# Tag the binding site on all proteins of the dataset. WITH ALPHAFOLD STRUCTURES

## Imports

In [1]:
## Imports and settings
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull
#%matplotlib inline
sns.set_style("darkgrid")

import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output


# from tqdm.auto import tqdm
from tqdm.notebook import tnrange, tqdm

tqdm.pandas()  # activate tqdm progressbar for pandas apply

pd.options.mode.chained_assignment = (
    None  # default='warn', remove pandas warning when adding a new column
)
pd.set_option("display.max_columns", None)

from IPython.core.interactiveshell import InteractiveShell


InteractiveShell.ast_node_interactivity = "all"
#%config InlineBackend.figure_format ='svg' #better quality figure figure
np.seterr(divide='ignore', invalid='ignore')

import matplotlib.gridspec as gridspec


import MDAnalysis as mda
import nglview as nv
import platform

  from pandas import Panel




## Tool functions 

In [2]:
def equality_test(pop1,pop2, ALTERNATIVE="two-sided", pairwised=False):
    """
    Perform equality test between two sample
    
    Args:
        - pop1 (array): sample1 (continuous or discrete data)
        - pop2 (array): sample2 (continuous or discrete data)
        - ALTERNATIVE (str): could be 'two-sided','less','greater'. Default is 'two-sided'
    Teturn:
        None
    """
        
    #for formating
    from IPython.display import Markdown, display
    def printmd(string, color=None):
        colorstr = "<span style='color:{}'>{}</span>".format(color, string)
        display(Markdown(colorstr))
        
    printmd("**STATISTICAL TEST BETWEEN TWO SAMPLES**")
    
    printmd(f" - ALTERNATIVE HYPOTHESIS = {ALTERNATIVE}")
    from decimal import Decimal
    import scipy.stats as stats
    
    sign = {"two-sided":"≠",
       "less":"<",
       "greater":">"}
    

    printmd("**NORMALITY TEST (shapiro)**")
    normality = True
    printmd("*The two samples should follow a normal law to use a standard t.test*")

    normpop1 = stats.shapiro(pop1).pvalue
    normpop2 = stats.shapiro(pop2).pvalue
    if normpop1 < 0.05:
        printmd(f"---- Sample 1 shapioro test pvalue = {normpop1:.2E}, <= 0.05. This sample DO NOT follow a normal law", color='red')
        normality = False
    else: 
        printmd(f"---- Sample 1 shapioro test pvalue = {normpop1:.2E}, > 0.05. This sample follow a normal law", color='blue')
    if normpop2 < 0.05:
        printmd(f"---- Sample 1 shapioro test pvalue = {normpop2:.2E}, <= 0.05. This sample DO NOT follow a normal law", color='red')
        normality = False
    else: 
        printmd(f"---- Sample 1 shapioro test pvalue = {normpop2:.2E}, > 0.05. This sample follow a normal law", color='blue')

    if normality == True:
        printmd("Both samples follow a normal law")

        if pairwised == True:
            printmd("**TTest_REL Pairwise test **")
            equalstat, equalpval = stats.ttest_rel(pop1,pop2)
        else: 
            print("Performing Variance equality test")
            varstat, varpval = stats.levene(pop1,pop2)
            #Levene, pval < 0.05 --> Variance are not equal.
            #Levene, pval > 0.05 --> Non significatif and the hypothesis H0 is not rejected. 


            printmd("-- Null hypothesis, the the variances are equals")
            print(f"---- Variance test --> stat={varstat:.2E}, p-value={varpval:.3E}")
            if varpval < 0.05:
                printmd("P value <= 0.05, H0 rejected. the variance are not equal. Performing Welch’s t-test", color="red")
                equal_var = False
            else:
                printmd("Pvalue > 0.05, the variances not equal. Performing a standard independent 2 sample test ", color="blue")
                equal_var = True
            equalstat, equalpval = stats.ttest_ind(pop1,
                                       pop2,
                                       equal_var=equal_var,)
            
        print(f"t-test --> stat={equalstat:.2E}, p-value={equalpval:.3E}")
        print(f"  Null hypothesis : the two samples average are equal")
        print(f"  Alternative hypothesis : average(sample2) {sign[ALTERNATIVE]} average(sample2)")
        if equalpval > 0.05:
            printmd("pvalue > 0.05, we cannot reject the null hypothesis of identical average between both populations.", color="blue")
        else:
            printmd("pvalue <= 0.05, the null hypothesis is rejected, the two samples are differents", color="red")
    else:
        printmd("One or both sample(s) doesn't follow a normal law.")
        if pairwised==True:
            printmd("**WILCOXON SIGNED-RANK TEST**")
            printmd(f"  Null hypothesis : the two distribution are equals")
            printmd(f"  Alternative hypothesis : pop1 {sign[ALTERNATIVE]} pop2")
            stat, pval = stats.wilcoxon(pop1,pop2, alternative=ALTERNATIVE)
        else:
            printmd("Performing a Wilcoxon Rank sum test with continuity correction")
            printmd("**WILCOXON RANK SUM TEST WITH CONTINUITY CORRECTION (or Mann-Whitney test)**")
            printmd(f"  Null hypothesis : the two distribution are equals")
            printmd(f"  Alternative hypothesis : pop1 {sign[ALTERNATIVE]} pop2")
            stat, pval = stats.mannwhitneyu(pop1,pop2, alternative=ALTERNATIVE)
        if pval < 0.05:
            printmd(f"  pvalue = {pval:.2E} which is <= 0.05. The null hypothesis is rejected. The alternative hypothesis (f{ALTERNATIVE}) is valid and the two distrubition are different", color="red")
        else:
            printmd(f"  pvalue = {pval:.2E} which is > 0.05. The null hypothesis not rejected. Both distributions are not statistically differents.", color="blue")

## Parameters

In [3]:
%run "./00-SETUP.ipynb"

## Loading datasets

In [4]:
### Load Dataset 
DATASETAF = pd.read_pickle(f"{WORKDIR}/DATASET_peprmint_d25.pkl")
DATASET = pd.read_pickle(f"{WORKDIR}/DATASET_peprmint_d25.pkl")
#DATASET = pd.read_pickle(f"{WORKDIR}/DATASET_peprmint.backup")

In [5]:
domainList=['PH','C2','START','C1','C2DIS','PX','ENTH','PLD','ANNEXIN','PLA']

len(DATASET.query("data_type == 'cathpdb' and domain in @domainList").cathpdb.unique())

4249

In [6]:

len(DATASETAF.query("data_type != 'prosite' and domain in @domainList").cathpdb.unique())

5910

## Define calculation to performe

In [13]:
DO_REPORT = False
TEST_SIMILARITIES_AFCATH = True
DO_PICTURES = False
SHOW_EXAMPLE = False
DO_ALIGNMENT = False

## Define cluster level here

In [8]:
if not 'clusterLevel' in locals():
    clusterLevel = "S100"
    unirefLevel = 'uniref100'
ZAXISLEVEL=0

# Test how different is AF and CATH for the PH Domain

## PH Domain

In [9]:
if DO_ALIGNMENT:
    !cd /Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/PH/
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d PH -ref 2da0A00 -res1 19 -res2 42 -res3 50 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"
    #!/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d PH -ref 1fgyA00 -res1 367 -res2 276 -res3 349 -i 'raw' -o 'zaligned_sansom'

### Canonic orientation

In [10]:
#Prepare Exclusion of PTB and other domains
#Procedure
#1. Get protein template from Mark Lemmon review (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1221219/pdf/10926821.pdf) or from the PDB
#2. search from this protein in rcsb PDB -> Annotation -> Click on the PFAM familly 
#3. In the results, display only PDB IDs 
#4. For each PDB, get S60 Cluster IDS. 
#5. Remove those S60 Cluster IDs from the PH superfamilly.

# LIST OF PFAM
# PTB = PF00640
# RANDB = PF00638
# EVH/WH1 = PF00568  

pdb_PTB = "2YT8_1,2YT7_1,6DM4_2,3O17_2,3D8F_1,2E45_1,2YT1_1,6O5O_1,1NU2_1,3O2M_2,3H8D_2,1NTV_1,5LW1_3,6FUZ_1,2G01_2,6LNM_2,5YI7_1,5YI8_1,4DBB_1,6F5E_3,2NMB_1,2MYQ_1,1N3H_1,3F0W_1,3SUZ_1,3SV1_1,1SHC_1,2L1C_1,1AQC_1,2YSC_1,3D8D_1,2YSZ_1,3D8E_1,1OY2_1,3VUG_2,4JMH_2,5NQH_1,3VUH_2,3VUK_2,3VUI_2,4E73_2,2FPD_1,3VUL_2,2EJ8_1,6ITU_1,1WJ1_1,3OXI_2,5C5B_2,5C5B_1,6F7O_1,2FPF_1,2LSW_1,1WGU_1,4Z88_2,2ELA_1,2ELB_1,2DYQ_1,2FPE_1,3VUM_2,5UWS_4,2GMX_2,6OVF_1,2IDH_1,2OEI_1,4H8S_1,2EAM_1,1OQN_1,1TCE_1,4G1W_2,4XWX_1,5YQG_2,1X11_1,1M7E_1,1MIL_1,4HYS_2,1P3R_1,2Z0N_1,2LMR_1,3QYB_1,5NJK_1,2Q12_1,5NJJ_1,3PTG_2,3DXC_1,3QYE_1,1X45_1,1UKI_2,3DXE_1,1Y7N_1,3SO6_1,2Z0O_1,2KIV_1,2Q13_1,4NC6_1,3DXD_1,1UKH_2,2HO2_1,2YT0_1,2M38_1,2H96_2,5ZRY_1,4HYU_2,1DDM_1,1QG1_2,1U39_1,4IZY_2,3V3V_2,3VUD_2,6KMH_2,2KE7_1,1U3B_1,2ROZ_2,5CZI_2,1U37_1,4H39_2,1U38_1"
pdb_RANDB = "5UWW_2,6KFT_2,1K5G_2,5UWT_2,3UIO_4,5UWI_2,6CIT_2,4HAU_2,4HB2_2,4HAT_2,4HB3_2,4HB4_2,4HAX_3,4HAV_2,4HAW_2,3TJ3_2,4HB0_2,5DIF_2,1XKE_1,5DH9_2,4GMX_2,2CRF_1,6XJT_2,6XJU_2,6XJR_2,6XJS_2,5DHF_2,5DHA_2,4LQW_1,5UWJ_2,3UIP_4,5UWH_2,3UIN_4,5JLJ_2,5ZPU_2,2C1M_2,4GA0_1,6A38_2,2C1T_2,5DI9_2,6A3E_2,6A3C_2,6A3B_2,6A3A_2,4HAZ_2,4HAY_2,4GPT_2,2LAS_2,7L5E_2,4GA1_1,1UN0_2,5UWU_2,1RRP_2,5UWS_2,3OAN_1,5CLL_2,5YRO_2,6XJP_2,4L6E_1,6M6X_2,5CLQ_2,6X2M_2,6X2O_2,5YSU_2,6X2S_2,6X2U_2,5YST_2,6X2V_2,6X2W_2,6X2X_2,6X2Y_2,6X2R_2,6X2P_2,4WVF_2,5YTB_2,2EC1_1,5UWQ_2,1K5D_2,5UWR_2,5UWO_2,5UWP_2,2Y8G_1,3N7C_1,4I9Y_1,3M1I_2,5XZX_2,1Z5S_4,2Y8F_1,3WYF_2,4GA2_1"
pdb_EVH = "1TJ6_1,5N91_1,5ZZ9_1,5N9C_1,1CEE_2,2XQN_2,3RSE_8,6RCJ_1,6RCF_1,3CHW_3,2V8C_2,2PBD_3,6RD2_1,2PAV_3,2IYB_1,6XXR_1,7A5M_1,2OT0_2,1XOD_1,1I2H_1,1EVH_1,6V65_1,1USD_1,1USE_1,7AKI_1,1ZUK_2,4MY6_1,2IFS_1,3SYX_1,1QC6_1,2JP2_1,3CVF_1,1T84_1,5N9P_1,2FF3_2,6UHC_8,1I7A_1,2P8V_1,5NDU_1,2LNH_1,3M3N_2,5NCG_1,5NCF_1,5NEG_1,2A3Z_3,6XVT_1,1MKE_1,2VCP_2,2HO2_2,3CVE_1,1EJ5_1,5NCP_1,5ND0_1,1DDW_1,5NC7_1,1DDV_1,2K42_1,4CC7_2,4CC3_2,1EGX_1,4CC2_2,5NBF_1,5NAJ_1,6V6F_1,5NC2_1,5NBX_1"
# pdb_DCP1="5JP4_1,2QKL_1,4B6H_1,5J3Q_1,5N2V_1,1Q67_1,6Y3Z_2,2QKM_1,5J3Y_1,5LOP_2,5LON_2,5KQ1_1,6AM0_2,5J3T_1,2LYD_1,5KQ4_1"
pdb_PTB = [x[:4] for x in pdb_PTB.split(',')]
pdb_RANDB = [x[:4] for x in pdb_RANDB.split(',')]
pdb_EVH = [x[:4] for x in pdb_EVH.split(',')]
# pdb_DCP1 = [x[:4] for x in pdb_DCP1.split(',')]

pdbs_to_remove = pdb_PTB+pdb_RANDB+pdb_EVH


removeS60 = DATASET.query("domain == 'PH' and pdb in @pdbs_to_remove")["S60"].unique()
cathpdbs_to_remove = DATASET.query("domain == 'PH' and S60 in @removeS60").cathpdb.unique()
print(f"{len(cathpdbs_to_remove)} will be removed IN TOTAL")
DATASET = DATASET.query("cathpdb not in @cathpdbs_to_remove")

DATASETAF = DATASETAF.query("cathpdb not in @cathpdbs_to_remove")
pd.DataFrame(cathpdbs_to_remove).to_csv(f"{FIGURESFOLDER}/Fake_PH.csv", index=False, header=False)

142 will be removed IN TOTAL


### Getting what's in common between AF and CATH

In [14]:
if TEST_SIMILARITIES_AFCATH:
    import importlib
    import pepr2ds
    importlib.reload(pepr2ds)
    from pepr2ds.dataset.tagibs import Dataset
    PH = Dataset(DATASET,PEPRMINT_FOLDER)
    cathcluster_uniprot = PH.selectUniquePerCluster(DATASET.query("domain =='PH' and data_type == 'cathpdb'"),
                                   'S100',
                                   unirefLevel
                                  ).uniprot_acc.unique()
    AFcluster_uniprot = PH.selectUniquePerCluster(DATASETAF.query("domain =='PH' and data_type == 'alfafold'"),
                                   'S100',
                                   unirefLevel
                                  ).uniprot_acc.unique()
    structures_in_common = list(set(AFcluster_uniprot).intersection(cathcluster_uniprot))


<module 'pepr2ds' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepr2ds/pepr2ds/__init__.py'>

  from pandas import Panel


### Preparing Cath DS

In [18]:
if TEST_SIMILARITIES_AFCATH:
    import importlib
    import pepr2ds
    importlib.reload(pepr2ds)
    from pepr2ds.dataset.tagibs import Dataset

    PH = Dataset(DATASET,PEPRMINT_FOLDER)
    PH.tag_ibs(DATASETAF, 
                domain = 'PH', #Domain
                pdbreference = "2da0A00", #PDB Template
                includeResidueRange = [[20,26],[42,50]], #Include those residues in IBS
                excludeResidueRange=[], #Exclude thoses residues from IBS
                extendSS=False, #Extend the secondary structures
                withAlignment=False, #restrict the results with pdb that have a sequences.
                onlyC=False, #get only COIL in the IBS.
                cathCluster=clusterLevel, #Structure redundancy filter
                Uniref=unirefLevel, #Sequence redundancy filter
                addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
                extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
                excludeStrand=False, #Exclude "strand" From secondary structure
                overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
                zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
                extendCoilOnly = False, #Extend coil only.
                coordinates_folder_name = 'zaligned', #Where are the PDBs
                filter_uniprot_acc = structures_in_common,
               data_type = 'cath',
              )
    
    PHAF = Dataset(DATASETAF,PEPRMINT_FOLDER)
    PHAF.tag_ibs(DATASETAF, 
            domain = 'PH', #Domain
            pdbreference = "2da0A00", #PDB Template
            includeResidueRange = [[20,26],[42,50]], #Include those residues in IBS
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = 'zaligned', #Where are the PDBs
            filter_uniprot_acc = structures_in_common,
           data_type = 'alfafold',
           base_folder='aligned_cath-AF',
          )
    
    def get_protrusions(dataset):
        def return_num_protrusion(group):
            return(len(group.query("is_hydrophobic_protrusion == True and atom_name == 'CB'")))
        num_protrusion = dataset.groupby('uniprot_acc').apply(lambda x: return_num_protrusion(x))
        return num_protrusion

    protrusions_per_structure_CATH = get_protrusions(PH.domainDf)
    protrusions_per_structure_AF = get_protrusions(PHAF.domainDf)
    uniprot_in_common = list(set(protrusions_per_structure_CATH.index).intersection(protrusions_per_structure_AF.index))
    equality_test(protrusions_per_structure_CATH[uniprot_in_common],
                  protrusions_per_structure_AF[uniprot_in_common],
                  pairwised=True)
    
    


<module 'pepr2ds' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepr2ds/pepr2ds/__init__.py'>

Domain= PH
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/cath//domains/PH/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=91.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=91.0), HTML(value='')))


len IBS 91
len nonIBS 91
Domain= PH
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/PH/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=91.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=91.0), HTML(value='')))


len IBS 90
len nonIBS 91


<span style='color:None'>**STATISTICAL TEST BETWEEN TWO SAMPLES**</span>

<span style='color:None'> - ALTERNATIVE HYPOTHESIS = two-sided</span>

<span style='color:None'>**NORMALITY TEST (shapiro)**</span>

<span style='color:None'>*The two samples should follow a normal law to use a standard t.test*</span>

<span style='color:red'>---- Sample 1 shapioro test pvalue = 2.44E-04, <= 0.05. This sample DO NOT follow a normal law</span>

<span style='color:red'>---- Sample 1 shapioro test pvalue = 2.73E-04, <= 0.05. This sample DO NOT follow a normal law</span>

<span style='color:None'>One or both sample(s) doesn't follow a normal law.</span>

<span style='color:None'>**WILCOXON SIGNED-RANK TEST**</span>

<span style='color:None'>  Null hypothesis : the two distribution are equals</span>

<span style='color:None'>  Alternative hypothesis : pop1 ≠ pop2</span>

<span style='color:blue'>  pvalue = 2.59E-01 which is > 0.05. The null hypothesis not rejected. Both distributions are not statistically differents.</span>

In [17]:
protrusions_per_structure_CATH = get_protrusions(PH.domainDf.query("IBS == True"))
protrusions_per_structure_AF = get_protrusions(PHAF.domainDf.query("IBS == True"))
uniprot_in_common.sort()
equality_test(protrusions_per_structure_CATH[uniprot_in_common],
            protrusions_per_structure_AF[uniprot_in_common],
            pairwised=True)

<span style='color:None'>**STATISTICAL TEST BETWEEN TWO SAMPLES**</span>

<span style='color:None'> - ALTERNATIVE HYPOTHESIS = two-sided</span>

<span style='color:None'>**NORMALITY TEST (shapiro)**</span>

<span style='color:None'>*The two samples should follow a normal law to use a standard t.test*</span>

<span style='color:red'>---- Sample 1 shapioro test pvalue = 1.33E-07, <= 0.05. This sample DO NOT follow a normal law</span>

<span style='color:red'>---- Sample 1 shapioro test pvalue = 1.37E-07, <= 0.05. This sample DO NOT follow a normal law</span>

<span style='color:None'>One or both sample(s) doesn't follow a normal law.</span>

<span style='color:None'>**WILCOXON SIGNED-RANK TEST**</span>

<span style='color:None'>  Null hypothesis : the two distribution are equals</span>

<span style='color:None'>  Alternative hypothesis : pop1 ≠ pop2</span>

<span style='color:red'>  pvalue = 4.72E-02 which is <= 0.05. The null hypothesis is rejected. The alternative hypothesis (ftwo-sided) is valid and the two distrubition are different</span>

# Taggin IBS for datasets

## PH domain

In [14]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
PH = Dataset(DATASET,PEPRMINT_FOLDER)
PH.tag_ibs(
    DATASETAF, 
            domain = 'PH', #Domain
            pdbreference = "2da0A00", #PDB Template
            includeResidueRange = [[20,26],[42,50]], #Include those residues in IBS
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = 'zaligned', #Where are the PDBs
           data_type = 'cath+af',
           base_folder='aligned_cath-AF',
          )

<module 'pepr2ds' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/pepr2ds/__init__.py'>

Domain= PH
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/PH/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=876.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=876.0), HTML(value='')))


len IBS 869
len nonIBS 876


## C2 Domain

In [190]:
## Command line for alignment!!!!
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d C2 -ref 1rsyA00 -res1 169 -res2 178 -res3 237 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"

rotation matrix: [[ 0.1887791   0.22999595  0.9547064 ]
 [ 0.22999595  0.93479195 -0.27067671]
 [-0.9547064   0.27067671  0.12357105]]
translation vector: [13.591332, -2.3096669, 1.071]
100%|█████████████████████████████████████████| 763/763 [00:29<00:00, 25.63it/s]
Done


In [9]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
C2 = Dataset(DATASET,PEPRMINT_FOLDER)
C2.tag_ibs(DATASETAF, 
            domain = 'C2', #Domain
            pdbreference = "1rsyA00",
            includeResidueRange = [[171,177],[232,238]], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = 'zaligned', #Where are the PDBs 
                      data_type = 'cath+af',
           base_folder='aligned_cath-AF',
          )


  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= C2
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/C2/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=480.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=480.0), HTML(value='')))


len IBS 480
len nonIBS 480


In [10]:
C2.show_structure_and_plane('1rlwA00')

NGLWidget()

## START Domain

### orientation 1

In [148]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d START -ref 2e3mA00 -res1 412 -res2 448 -res3 515 -i 'aligned/output' -o 'orientationA' -dir "aligned_cath-AF"

rotation matrix: [[ 0.86034084  0.01170914 -0.50958466]
 [ 0.01170914  0.9990183   0.04272399]
 [ 0.50958466 -0.04272399  0.85935914]]
translation vector: [-15.211999, 1.289, 3.727]
100%|█████████████████████████████████████████| 520/520 [00:27<00:00, 18.95it/s]
Done


In [178]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
STARTo1 = Dataset(DATASET,PEPRMINT_FOLDER)
STARTo1.tag_ibs(DATASETAF, 
            domain = 'START', #Domain
            pdbreference = "2e3mA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "orientationA",
                data_type = 'cath+af',
           base_folder='aligned_cath-AF',
)

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= START
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/START/orientationA
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=265.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=265.0), HTML(value='')))


len IBS 265
len nonIBS 265


## C1

In [150]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d C1 -ref 1ptrA00 -res1 243 -res2 257 -res3 237 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"

rotation matrix: [[-0.12685223 -0.15343192  0.97998324]
 [-0.15343192  0.97910875  0.13343427]
 [-0.97998324 -0.13343427 -0.14774348]]
translation vector: [7.2689996, 2.1923335, 1.1389999]
100%|█████████████████████████████████████████| 333/333 [00:05<00:00, 61.15it/s]
Done


In [179]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
C1 = Dataset(DATASET,PEPRMINT_FOLDER)
C1.tag_ibs(DATASETAF, 
            domain = 'C1', #Domain
            pdbreference = "1ptrA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "zaligned" ,
           data_type = 'cath+af',
           base_folder='aligned_cath-AF',
          )

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= C1
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/C1/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=199.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=199.0), HTML(value='')))


len IBS 199
len nonIBS 199


## C2DIS

In [152]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d C2DIS -ref 1czsA00 -res1 23 -res2 76 -res3 45 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"

rotation matrix: [[ 0.62704971  0.27073276 -0.73041936]
 [ 0.27073276  0.80346918  0.53022735]
 [ 0.73041936 -0.53022735  0.43051889]]
translation vector: [-8.084334, 5.905, 8.721]
100%|███████████████████████████████████████| 1354/1354 [01:12<00:00, 18.79it/s]
Done


In [180]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
C2DIS = Dataset(DATASET,PEPRMINT_FOLDER)
C2DIS.tag_ibs(DATASETAF, 
            domain = 'C2DIS', #Domain
            pdbreference = "1czsA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "zaligned" ,
           data_type = 'cath+af',
           base_folder='aligned_cath-AF', 
          )

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= C2DIS
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/C2DIS/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=368.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=368.0), HTML(value='')))


len IBS 361
len nonIBS 368


## PX 

In [153]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d PX -ref 1h6hA00 -res1 33 -res2 74 -res3 100 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"

rotation matrix: [[ 0.58488606  0.63813979  0.50068544]
 [ 0.63813979  0.01901057 -0.76968578]
 [-0.50068544  0.76968578 -0.39610338]]
translation vector: [8.763333, -3.8356667, -1.0789999]
100%|█████████████████████████████████████████| 225/225 [00:08<00:00, 27.02it/s]
Done


In [181]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
PX = Dataset(DATASET,PEPRMINT_FOLDER)
PX.tag_ibs(DATASETAF, 
            domain = 'PX', #Domain
            pdbreference = "1h6hA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "zaligned" ,
           data_type = 'cath+af',
           base_folder='aligned_cath-AF', 
          )

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= PX
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/PX/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=155.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=155.0), HTML(value='')))


len IBS 155
len nonIBS 155


## ENTH

In [154]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d ENTH -ref 1h0aA00 -res1 17 -res2 70 -res3 116 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"

rotation matrix: [[ 0.48232716  0.80931378 -0.33521889]
 [ 0.80931378 -0.26525627  0.5240709 ]
 [ 0.33521889 -0.5240709  -0.7829291 ]]
translation vector: [-8.942667, 11.059001, -3.0606668]
100%|█████████████████████████████████████████| 190/190 [00:08<00:00, 23.25it/s]
Done


In [182]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
ENTH = Dataset(DATASET,PEPRMINT_FOLDER)
ENTH.tag_ibs(DATASETAF, 
            domain = 'ENTH', #Domain
            pdbreference = "1h0aA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "zaligned" ,
           data_type = 'cath+af',
           base_folder='aligned_cath-AF', 
          )

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= ENTH
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/ENTH/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=98.0), HTML(value='')))


len IBS 98
len nonIBS 98


## PLD

In [53]:
DATASET.domain = DATASET.domain.astype(str)
DATASET.loc[DATASET.domain == "PIPLC", "domain"] = "PLD"


In [155]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d PLD -ref 3rlhA00 -res1 59 -res2 205 -res3 198 -i 'aligned/output' -o 'orientationOPM' -dir "aligned_cath-AF"
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d PLD -ref 3rlhA00 -res1 53 -res2 41 -res3 99 -i -i 'aligned/output' -o 'orientationCAGE' -dir "aligned_cath-AF"



rotation matrix: [[ 0.94999119 -0.14647713  0.27579192]
 [-0.14647713  0.57096459  0.80780189]
 [-0.27579192 -0.80780189  0.52095578]]
translation vector: [-0.85166645, 8.145667, 8.153333]
100%|█████████████████████████████████████████| 198/198 [00:13<00:00, 14.96it/s]
Done
usage: CalcAngle Through Trajectory [-h] -d DOMAIN -ref REF -res1 RES1 -res2
                                    RES2 -res3 RES3 [-dir DIRECTORY]
                                    [-i INPUTFOLDER] [-o OUTPUTFOLDER]
CalcAngle Through Trajectory: error: argument -i/--inputfolder: expected one argument


### Orientation OPM

In [183]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
PLDopm = Dataset(DATASET,PEPRMINT_FOLDER)
PLDopm.tag_ibs(DATASETAF, 
            domain = 'PLD', #Domain
            pdbreference = "3rlhA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "orientationOPM" ,
           data_type = 'cath+af',
           base_folder='aligned_cath-AF', 
          )


  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= PLD
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/PLD/orientationOPM
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=111.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=111.0), HTML(value='')))


len IBS 111
len nonIBS 111


## ANNEXIN

In [156]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d ANNEXIN -ref 1a8aA01 -res1 25 -res2 68 -res3 77 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"


rotation matrix: [[ 0.34064928  0.05187281 -0.93875837]
 [ 0.05187281  0.99591903  0.07385452]
 [ 0.93875837 -0.07385452  0.33656832]]
translation vector: [-7.088, -0.97699994, -1.3423334]
100%|█████████████████████████████████████████| 451/451 [00:10<00:00, 41.12it/s]
Done


In [59]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
ANNEXIN = Dataset(DATASET,PEPRMINT_FOLDER)
ANNEXIN.tag_ibs(DATASETAF, 
            domain = 'ANNEXIN', #Domain
            pdbreference = "1a8aA01",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "zaligned"  ,
           data_type = 'cath+af',
           base_folder='aligned_cath-AF',
          )

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

selecting amino acids
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=122.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=122.0), HTML(value='')))


len IBS 122
len nonIBS 122


## PLA

In [200]:
if DO_ALIGNMENT:
    !/Users/thibault/miniconda3/envs/peprmint/bin/python ~/OneDrive\ -\ University\ of\ Bergen/projects/peprmint/dev/pepermintdataset/scripts/align_on_z.py -d PLA -ref 1pocA00 -res1 7 -res2 92 -res3 76 -i 'aligned/output' -o 'zaligned' -dir "aligned_cath-AF"

rotation matrix: [[ 0.85333715 -0.09698638  0.51225907]
 [-0.09698638  0.93586407  0.33875077]
 [-0.51225907 -0.33875077  0.78920122]]
translation vector: [4.733333, 2.7543333, 2.415]
100%|█████████████████████████████████████████| 539/539 [00:24<00:00, 21.69it/s]
Done


In [206]:
import importlib
import pepr2ds
importlib.reload(pepr2ds)
from pepr2ds.dataset.tagibs import Dataset
PLA = Dataset(DATASET,PEPRMINT_FOLDER)
PLA.tag_ibs(DATASETAF, 
            domain = 'PLA', #Domain
            pdbreference = "1pocA00",
            includeResidueRange = [], #CHANGE 173 to 171 and run again!
            excludeResidueRange=[], #Exclude thoses residues from IBS
            extendSS=False, #Extend the secondary structures
            withAlignment=False, #restrict the results with pdb that have a sequences.
            onlyC=False, #get only COIL in the IBS.
            cathCluster=clusterLevel, #Structure redundancy filter
            Uniref=unirefLevel, #Sequence redundancy filter
            addSequence=False, #add the non structural data in the IBS/NONIBS dataset.
            extendAlign=False, #Extend the secondary structure instead of a raw "cut" based on the alignment position
            excludeStrand=False, #Exclude "strand" From secondary structure
            overide_axis_mode = True, #use the Zaxis instead of the alignment to tag the IBS
            zaxis=ZAXISLEVEL, #Z axis plane to define "IBS" or not IBS
            extendCoilOnly = False, #Extend coil only.
            coordinates_folder_name = "zaligned",
           data_type = 'cath+af',
           base_folder='aligned_cath-AF',  
          )

  from pandas import Panel


<module 'Ui' from '/Users/thibault/OneDrive - University of Bergen/projects/peprmint/dev/pepermintdataset/notebooks/Ui.py'>

Domain= PLA
selecting amino acids
/Users/thibault/Documents/WORK/peprmint/databases/aligned_cath-AF/PLA/zaligned
UPDATING COORDINATES


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=191.0), HTML(value='')))


taggin IBS


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=171.0), HTML(value='')))


len IBS 141
len nonIBS 170


## MERGED DATASET

In [64]:
def merge_dataset(**kwargs):
    importlib.reload(pepr2ds)
    merged = Dataset(DATASET,PEPRMINT_FOLDER)

    merged.ibs = pd.concat([x.ibs for x in kwargs.values()])
    merged.nonibs = pd.concat([x.nonibs for x in kwargs.values()])
    merged.domainDf = pd.concat([x.domainDf for x in kwargs.values()])
    domainLabel = "+".join([x for x in kwargs.keys()])
    merged.domainLabel = domainLabel
    return(merged)
    

if DO_REPORT:
    MERGED = merge_dataset(PH=PH, 
                           C2=C2, 
                           START=STARTo1,
                           C1=C1,
                           C2DIS=C2DIS,
                           PX=PX,
                           #ENTH=ENTH,
                           PLD=PLDopm,
                           ANNEXIN=ANNEXIN,
                           PLA=PLA,
                      )
    MERGED.analysis.report(displayHTML=False)

  from pandas import Panel


making report for PH+C2+START+C1+C2DIS+PX+ENTH+PLD+ANNEXIN+PLA


  plt.tight_layout()
  plt.tight_layout()
