Import Carbonara's Data Tools by running the below cell (_ctrl+enter_)

In [1]:
# <-- Imports
import CarbonaraDataTools as CDT
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.io as pio
pio.renderers.default='notebook'
import numpy as np
import os, subprocess, shutil

1) Enter the location of your PDB file below

In [2]:
# <-- Run this cell to enter your PDB file location
pdbFile_text = widgets.Text(
    value='Examples/Lysozyme/Lysozyme.pdb',
    placeholder='Enter your PDB file location:',
    description='Enter your PDB file location:',
    disabled=False,
    layout={'width': '550px','height':'50px'},   
)

pdbFile_text.style = {'description_width': '200px','description_height': '25px'}

def pdbFile_handler(change):
    global pdb_file
    pdb_file = change.new
    with output:
        clear_output(wait=True)

pdb_file = 'Examples/Lysozyme/Lysozyme.pdb'
pdbFile_text.observe(pdbFile_handler,names='value')
display(pdbFile_text)

Text(value='Examples/Lysozyme/Lysozyme.pdb', description='Enter your PDB file location:', layout=Layout(height…

2. Enter the location of your corresponding SAXS data file below

In [3]:
# <-- Run this cell to enter your SAXS file location
saxsFile_text = widgets.Text(
    value='Examples/Lysozyme/LysozymeSaxs.dat',
    placeholder='Enter your SAXS file location:',
    description='Enter your SAXS file location:',
    disabled=False,
    layout={'width': '550px','height':'50px'},   
)

saxsFile_text.style = {'description_width': '200px','description_height': '25px'}

def saxsFile_handler(change):
    global SAXS_file
    global working_path
    SAXS_file = change.new
    working_path = os.path.dirname(SAXS_file)
    with output:
        clear_output(wait=True)

SAXS_file = 'Examples/Lysozyme/LysozymeSaxs.dat'
saxsFile_text.observe(saxsFile_handler,names='value')
display(saxsFile_text)

Text(value='Examples/Lysozyme/LysozymeSaxs.dat', description='Enter your SAXS file location:', layout=Layout(h…

3. Enter a name for your prediction below (the name of your protein should work!)

In [4]:
# <-- Run this cell to enter a name for your prediction
molName_text = widgets.Text(
    value='Lysozyme',
    placeholder='Enter a name for your prediction:',
    description='Enter a name for your prediction:',
    disabled=False,
    layout={'width': '550px','height':'50px'},   
)

molName_text.style = {'description_width': '200px','description_height': '25px'}

def molName_handler(change):
    global mol_name
    mol_name = change.new
    with output:
        clear_output(wait=True)

mol_name = 'Lysozyme'
molName_text.observe(molName_handler,names='value')
display(molName_text)

Text(value='Lysozyme', description='Enter a name for your prediction:', layout=Layout(height='50px', width='55…

4. Confirm your file locations and write them to newFitData

In [5]:
# <-- Run this cell and click the button to confirm file locations

working_path = 'newFitData/'+mol_name
confirm_filelocs_text = 'Confirm file locations'

confirm_filelocs_button = widgets.Button(
    description=confirm_filelocs_text,
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip=confirm_filelocs_text,
    layout=widgets.Layout(width=f'{len(confirm_filelocs_text) + 4}ch'), 
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
output = widgets.Output()
confirm_filelocs_button.style = {'description_width': '300px','description_height': '25px'}

def confirm_filelocs_button_clicked(b):
    global working_path
    working_path = 'newFitData/'+mol_name
    if not os.path.isdir(working_path):
        os.makedirs(working_path)
    with output:
        clear_output(wait=True)
        print("File locations confirmed.")     


confirm_filelocs_button.on_click(confirm_filelocs_button_clicked)
display(confirm_filelocs_button)
display(output)

Button(button_style='success', description='Confirm file locations', icon='check', layout=Layout(width='26ch')…

Output()

5. Check how many chains are in your PDB file. (Make sure you've confirmed your file locations above)

In [28]:
# <-- Run this cell to check for any breaks in your backbone

coords_chains_in, sequence_chains_in, secondary_structure_chains_in, missing_residues_chains_in = CDT.pull_structure_from_pdb(pdb_file)
for j in range(len(coords_chains_in)):
    breaking_indices = CDT.missing_ca_check(coords_chains_in[j])
    if len(breaking_indices) > 0:
        chains = [i+1 for i in range(len(breaking_indices)+1)]
        print("We think your PDB file has", len(breaking_indices)+1, 'chains')
        coords_chains_in[j], sequence_chains_in[j] = CDT.break_into_chains(coords_chains_in[j],sequence_chains_in[j],breaking_indices)

coords_chains,sequence_chains = [],[]
for i in range(len(coords_chains_in)):
    if isinstance(coords_chains_in[i],list):
        for j in range(len(coords_chains_in[i])):
            coords_chains.append(coords_chains_in[i][j])
            sequence_chains.append(sequence_chains_in[i][j])
    else:
        coords_chains.append(coords_chains_in[i])
        sequence_chains.append(sequence_chains_in[i])

coords=[]
for i in range(len(coords_chains)):
    for j in range(len(coords_chains[i])):
        coords.append(coords_chains[i][j])
coords = np.array(coords)

# use kappa tau method to identify secondary structure
secondarysplit= [CDT.findSimilar(CDT.getKapTauList(coords_chains[i]),0.2) for i in range(len(coords_chains))]
# Writes all the coords, fingerprint, mixture & SAXS in Carbonara's expected input / file struct
CDT.write_fingerprint_file(len(coords_chains), sequence_chains, secondarysplit, working_path)    
CDT.write_coordinates_file(coords,working_path)
CDT.write_mixture_file(working_path)
CDT.write_saxs(SAXS_file, working_path)
chains = [i+1 for i in range(len(coords_chains))]
varyingSection_tensor = [[] for i in range(len(breaking_indices)+1)]

Are sure you have more than one chain - if not this will cause segmentation errors later! You have been warned...


6. Select the range of _q_ you'd like to fit to in this run.

In [29]:
# <-- Run this cell to enter your desired q range for the fit

minq_text = widgets.Text(
    value=str(round(np.genfromtxt(working_path+'/Saxs.dat')[:,0].min(),2)
),
    placeholder='Enter your min q:',
    description='Enter your min q:',
    disabled=False,
    layout={'width': '250px','height':'50px'},   
)

minq_text.style = {'description_width': '150px','description_height': '25px'}

def minq_handler(change):
    global minq
    minq = change.new
    with output:
        clear_output(wait=True)

minq = round(np.genfromtxt(working_path+'/Saxs.dat')[:,0].min(),2)

# Output widget
output = widgets.Output()
minq_text.observe(minq_handler,names='value')
display(minq_text)

startq_text = widgets.Text(
    value='0.15',
    placeholder='Enter your start q:',
    description='Enter your start q:',
    disabled=False,
    layout={'width': '250px','height':'50px'},   
)

startq_text.style = {'description_width': '150px','description_height': '25px'}

def startq_handler(change):
    global startq
    startq = change.new
    with output:
        clear_output(wait=True)

startq = '0.15'
# Output widget
output = widgets.Output()
startq_text.observe(startq_handler,names='value')
display(startq_text)

maxq = round(np.genfromtxt(working_path+'/Saxs.dat')[:,0].max(),2)

maxq_text = widgets.Text(
    value=str(round(np.genfromtxt(working_path+'/Saxs.dat')[:,0].max(),2)),
    placeholder='Enter your max q:',
    description='Enter your max q:',
    disabled=False,
    layout={'width': '250px','height':'50px'},   
)

maxq_text.style = {'description_width': '150px','description_height': '25px'}

def maxq_handler(change):
    global maxq
    maxq = change.new
    with output:
        clear_output(wait=True)

# Output widget
output = widgets.Output()
maxq_text.observe(maxq_handler,names='value')
display(maxq_text)

Text(value='0.0', description='Enter your min q:', layout=Layout(height='50px', width='250px'), placeholder='E…

Text(value='0.15', description='Enter your start q:', layout=Layout(height='50px', width='250px'), placeholder…

Text(value='0.34', description='Enter your max q:', layout=Layout(height='50px', width='250px'), placeholder='…

7. View the initial fit to the SAXS data for this _q_ range. If you change q<sub>max</sub> above, rerun this cell to update the fit

In [30]:
# <-- Run this cell to view the initial fit to the scattering

initial_test_script_name = CDT.write_initial_saxs_check_sh(working_path,mol_name,1,1,float(startq))
subprocess.call(['sh', initial_test_script_name],stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
CDT.SAXS_fit_plotter(working_path+'/Saxs.dat',working_path+'/tmp/mol1_initial_scatter.dat',False)

8. Let us predict which subsections to change during the prediction.

In [31]:
# Run this cell to predict the subsections we think you should vary
allowed_linker, linker_indices = CDT.find_non_varying_linkers(working_path+'/coordinates1.dat',working_path+'/fingerPrint1.dat')
ss_len_tensor = [len(i) for i in CDT.get_sses(working_path+'/fingerPrint1.dat')]
# Calculate cumulative lengths
cumulative_lengths = np.cumsum(ss_len_tensor)

# Initialize an empty list to hold the chunked groups
chunked_groups = [[] for _ in range(len(cumulative_lengths))]

# Group the indices based on the cumulative lengths
for index in allowed_linker:
    for i, cum_length in enumerate(cumulative_lengths):
        if index < cum_length:
            chunked_groups[i].append(index)
            break

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:21<00:00,  2.84it/s]


9. View the recommended varying subsections and change them if you'd like. 
* If you select a different chain from the dropdown, you'll need to rerun the viewer cell.
* Use _ctrl + click_ to select multiple different subsections
* To reset to our predicted subsections, simply rerun the cell.

In [32]:
# <-- Run this cell to select which chain to process

chains_Dropdown = widgets.Dropdown(
    options = chains,
    value = chains[0],
    description = 'Select which chain to process from the dropdown box ',
    disabled=False,
    layout={'width': '400px','height':'25px'}
)
chains_Dropdown.style = {'description_width': '350px','description_height': '25px'}

def chains_handler(change):
    global chain
    chain = change.new
    with output:
        clear_output(wait=True)  # Clear previous output

chain = chains[0]
chains_Dropdown.observe(chains_handler,names='value')
display(chains_Dropdown)


Dropdown(description='Select which chain to process from the dropdown box ', layout=Layout(height='25px', widt…

In [34]:
#<--- Run this cell to view the recommended varying subsections for the selected chain
fullPoss = CDT.possibleLinkerList(pdb_file,working_path+'/fingerPrint1.dat',chain)
possLinkers = [i[1] for i in fullPoss]
# Output widget
output = widgets.Output()
def render_initial(change=None):
        with output:
            clear_output(wait=True)
            CDT.highlightVaryingSections(working_path,pdb_file,recLinkers).show("notebook")
if chain==1:
    recs = [i[1] for i in fullPoss if i[0] in allowed_linker.astype(str) and CDT.linkerLengthCheck(i[1])]
    chunked_groups[chain-1] = [int(i[0]) for i in fullPoss if i[0] in allowed_linker.astype(str) and CDT.linkerLengthCheck(i[1])]
    linker_SelectMultiple = widgets.SelectMultiple(
    options=possLinkers,
    value=recs,
    #rows=10,
    description='Varying Subsections',
    layout={'width': '300px','height':'150px'},
    disabled=False
    )

    def SelectMultiple_handler(change):
        global varyingLinkers
        varyingLinkers = change.new
        with output:
            clear_output(wait=True)
            selectedLinkers = [int(i[0]) for i in fullPoss if i[1] in varyingLinkers]
            chunked_groups[chain-1] = [i for i in selectedLinkers]
            CDT.highlightVaryingSections(working_path,pdb_file,selectedLinkers).show("notebook")

    linker_SelectMultiple.style = {'description_width': '150px','description_height': '25px'}
    recLinkers = [int(i[0]) for i in fullPoss if i[1] in recs]
    linker_SelectMultiple.observe(SelectMultiple_handler,names='value')
    display(linker_SelectMultiple)
    display(output)
    render_initial()
else:
    recs = [i[1] for i in fullPoss if int(i[0])+cumulative_lengths[chain-2] in allowed_linker and CDT.linkerLengthCheck(i[1])]
    chunked_groups[chain-1] = [int(i[0])+cumulative_lengths[chain-2] for i in fullPoss if int(i[0])+cumulative_lengths[chain-2] in allowed_linker and CDT.linkerLengthCheck(i[1])]
    linker_SelectMultiple = widgets.SelectMultiple(
    options=possLinkers,
    value=recs,
    #rows=10,
    description='Varying Subsections',
    layout={'width': '300px','height':'150px'},
    disabled=False
    )

    def SelectMultiple_handler(change):
        global varyingLinkers
        varyingLinkers = change.new
        with output:
            clear_output(wait=True)
            selectedLinkers = [int(i[0]) for i in fullPoss if i[1] in varyingLinkers]
            chunked_groups[chain-1] = [i+cumulative_lengths[chain-2] for i in selectedLinkers]
            CDT.highlightVaryingSections(working_path,pdb_file,selectedLinkers).show("notebook")

    linker_SelectMultiple.style = {'description_width': '150px','description_height': '25px'}
    recLinkers = [int(i[0]) for i in fullPoss if i[1] in recs]
    linker_SelectMultiple.observe(SelectMultiple_handler,names='value')
    display(linker_SelectMultiple)
    display(output)
    render_initial()

SelectMultiple(description='Varying Subsections', index=(0, 1, 2, 7, 10, 11, 12, 13, 15, 16, 19, 25, 28), layo…

Output()

10. Confirm the varying subsections. (Make sure you've checked them for <b>all</b> chains!)

In [12]:
# <-- Run this cell to confirm you are happy with your selected varying sections
varSec_text = 'Confirm varying sections'

varSec_button = widgets.Button(
    description='Confirm varying sections',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Confirm varying sections',
    layout=widgets.Layout(width=f'{len(varSec_text) + 4}ch'), 
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
output = widgets.Output()
varSec_button.style = {'description_width': '300px','description_height': '25px'}

def on_varSec_button_clicked(b):
    
    with output:
        clear_output(wait=True)
        flattened = [item for sublist in chunked_groups for item in sublist]
        CDT.write_varysections_file(flattened,working_path)
        print("Varying sections confirmed.")
        
        
        
varSec_button.on_click(on_varSec_button_clicked)
display(varSec_button)
display(output)

Button(button_style='success', description='Confirm varying sections', icon='check', layout=Layout(width='28ch…

Output()

11. Add any distance constraints. If you specify ResIDs alone, we will keep these fixed to their initial relative distance.

In [13]:
# <-- Run this cell to add distance constraints

resA_text = widgets.Text(
    value='',
    placeholder=' ',
    description='From resID:',
    disabled=False   
)

resB_text = widgets.Text(
    value='',
    placeholder=' ',
    description='To resID:',
    disabled=False   
)

dist_text = widgets.Text(
    value='',
    placeholder=' ',
    description="Distance (Angstroms):",
    disabled=False   
)

resA_text.style = {'description_width': '150px','description_height': '25px'}
resB_text.style = {'description_width': '150px','description_height': '25px'}
dist_text.style = {'description_width': '150px','description_height': '25px'}

contactPreds = []
fixedDistSet=[]

def resA_handler(change):
    global resA
    resA = change.new
    with output:
        clear_output(wait=True)

def resB_handler(change):
    global resB
    resB = change.new
    with output:
        clear_output(wait=True)

def dist_handler(change):
    global dist
    dist = change.new
    with output:
        clear_output(wait=True)

resA = ''
resB = ''
dist = ''

output = widgets.Output()

resA_text.observe(resA_handler,names='value')
resB_text.observe(resB_handler,names='value')
dist_text.observe(dist_handler,names='value')
display(resA_text)
display(resB_text)
display(dist_text)
### Add distance constraint button
distCons_text = 'Add distance constraint'
distCons_button = widgets.Button(
    description=distCons_text,
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip=distCons_text,
    layout=widgets.Layout(width=f'{len(distCons_text) + 4}ch'), 
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

distCons_button.style = {'description_width': '300px','description_height': '25px'}

def on_distCons_button_clicked(b):
    with output:
        print('Distance constraint added')
        clear_output(wait=True)
        if dist!='':
            contactPreds.append([int(resA),int(resB)])
            fixedDistSet.append(float(dist))
        else:
            contactPreds.append([int(resA),int(resB)])


### Check distance constraint button

check_distCons_text = 'Check distance constraints'
check_distCons_button = widgets.Button(
    description=check_distCons_text,
    disabled=False,
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip=check_distCons_text,
    layout=widgets.Layout(width=f'{len(check_distCons_text) + 4}ch'), 
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

check_distCons_button.style = {'description_width': '300px','description_height': '25px'}

def check_distCons_button_clicked(b):
    with output:
        clear_output(wait=True)
        print("Distance constraints are: ")
        print(contactPreds)
        print(fixedDistSet)

### Reset distance constraint button

reset_distCons_text = 'Reset distance constraints'
reset_distCons_button = widgets.Button(
    description=reset_distCons_text,
    disabled=False,
    button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
    tooltip=reset_distCons_text,
    layout=widgets.Layout(width=f'{len(reset_distCons_text) + 4}ch'), 
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

reset_distCons_button.style = {'description_width': '300px','description_height': '25px'}

def reset_distCons_button_clicked(b):
    print("Distance constraints reset.")
    global contactPreds
    global fixedDistSet
    with output:
        clear_output(wait=True)
        contactPreds = []
        fixedDistSet=[]
               
### Confirm distance constraint button   

confirm_distCons_text = 'Confirm distance constraints'
confirm_distCons_button = widgets.Button(
    description=confirm_distCons_text,
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip=confirm_distCons_text,
    layout=widgets.Layout(width=f'{len(confirm_distCons_text) + 4}ch'), 
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

confirm_distCons_button.style = {'description_width': '300px','description_height': '25px'}

def confirm_distCons_button_clicked(b):
    print("Distance constraints confirmed.")
    with output:
        clear_output(wait=True)
        CDT.translate_distance_constraints(contactPreds,np.genfromtxt(working_path+'/coordinates1.dat'),working_path,fixedDistSet)
        
### Set on click actions               
distCons_button.on_click(on_distCons_button_clicked)
check_distCons_button.on_click(check_distCons_button_clicked)
reset_distCons_button.on_click(reset_distCons_button_clicked)   
confirm_distCons_button.on_click(confirm_distCons_button_clicked)

### Display all
display(distCons_button)
display(output)
display(check_distCons_button)
display(output)
display(reset_distCons_button)
display(output)
print('Once you have added all your distance constraints, confirm below')
display(confirm_distCons_button)
display(output)

Text(value='', description='From resID:', placeholder=' ', style=TextStyle(description_width='150px'))

Text(value='', description='To resID:', placeholder=' ', style=TextStyle(description_width='150px'))

Text(value='', description='Distance (Angstroms):', placeholder=' ', style=TextStyle(description_width='150px'…

Button(button_style='success', description='Add distance constraint', icon='check', layout=Layout(width='27ch'…

Output()

Button(button_style='info', description='Check distance constraints', icon='check', layout=Layout(width='30ch'…

Output()

Button(button_style='danger', description='Reset distance constraints', icon='check', layout=Layout(width='30c…

Output()

Once you have added all your distance constraints, confirm below


Button(button_style='success', description='Confirm distance constraints', icon='check', layout=Layout(width='…

Output()

12. Select how many distinct predictions you'd like for this run, and how many fitting steps we should take for each prediction.

In [14]:
# <-- Run this cell to select the number of runs and how mny fitting steps per run

no_runs_text = widgets.Text(
    value='10',
    placeholder='Enter the number of runs:',
    description='Enter the number of runs:',
    disabled=False,
    layout={'width': '350px','height':'50px'},   
)

no_runs_text.style = {'description_width': '250px','description_height': '25px'}

def no_runs_handler(change):
    global no_runs
    no_runs = change.new
    with output:
        clear_output(wait=True)

no_runs = '10'
# Output widget
output = widgets.Output()
no_runs_text.observe(no_runs_handler,names='value')
display(no_runs_text)

fit_steps_text = widgets.Text(
    value='10000',
    placeholder='Enter your number of fitting steps:',
    description='Enter your number of fitting steps:',
    disabled=False,
    layout={'width': '350px','height':'50px'},   
)

fit_steps_text.style = {'description_width': '250px','description_height': '25px'}

def fit_steps_handler(change):
    global fit_steps
    fit_steps = change.new
    with output:
        clear_output(wait=True)

fit_steps = '10000'
# Output widget
output = widgets.Output()
fit_steps_text.observe(fit_steps_handler,names='value')
display(fit_steps_text)

Text(value='10', description='Enter the number of runs:', layout=Layout(height='50px', width='350px'), placeho…

Text(value='10000', description='Enter your number of fitting steps:', layout=Layout(height='50px', width='350…

13. Enter a unique name for the run with these parameters.

In [15]:
# <-- Run this cell to enter a name for this run
runName_text = widgets.Text(
    value='TestRun',
    placeholder='Enter a name for this run:',
    description='Enter a name for this run:',
    disabled=False,
    layout={'width': '550px','height':'50px'},   
)

runName_text.style = {'description_width': '200px','description_height': '25px'}

def runName_handler(change):
    global run_name
    run_name = change.new
    with output:
        clear_output(wait=True)

run_name = 'TestRun'
# Output widget
output = widgets.Output()
runName_text.observe(runName_handler,names='value')
display(runName_text)

Text(value='TestRun', description='Enter a name for this run:', layout=Layout(height='50px', width='550px'), p…

14. Would you like to allow rotations?

In [24]:
# <-- Run this cell to choose if rotations are allowed

rotation_checkbox = widgets.Checkbox(
    value=True,
    description='Allow rotations?',
    disabled=False,
    indent=False
)
rotation = True
display(rotation_checkbox)

def checkbox_handler(change):
    global rotation
    rotation = rotation_checkbox.value

rotation_checkbox.observe(checkbox_handler, names='value')

Checkbox(value=True, description='Allow rotations?', indent=False)

13. Write the _.sh_ file for this run.

In [35]:
# <-- Press play to write your sh file

run_sh_file = CDT.write_run_sh_file(working_path,
                                    mol_name,
                                    run_name,
                                    1,
                                    minq,
                                    maxq,
                                    startq,
                                    int(fit_steps),
                                    int(no_runs),
                                    pairedQ=(len(fixedDistSet)>0),
                                    rotation=rotation)
#os.remove(initial_test_script_name)
#shutil.rmtree(working_path+'/tmp')
print(run_sh_file)

RunMe_IMPA1_dimer_IMPA1_dimer.sh


14. You now need to head to the terminal to set off your run!
* First, navigate to the carbonara directory
```shell
cd {carbonara_directory}
```
* Enter _sh_ followed by the script name printed above and hit enter.
```shell
sh {script_name}
```