### Initialize ImageJ

In [None]:
import imagej
ij = imagej.init('/Applications/Fiji_MARS-beta18.app')
ij.getVersion()

### Imports

In [None]:
import sys
sys.path.insert(0, '..')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import awesome_data
import scipy.stats
import itertools
from marspy.convert.archive import DnaMoleculeArchive, instantiate_archive, describe_archives
from marspy.stats import bootstrap, calc_ci, significance
from tqdm.notebook import tqdm
from functools import reduce
from scipy.stats import norm
from pingouin import welch_anova, pairwise_gameshowell


### Select archives

In [None]:
selected_archives = ['Cohesin-MCM_wt_highsalt.yama',
                     'Cohesin-MCM_wt_lowsalt.yama',
                     'Cohesin-MCM_YDF_lowsalt.yama',
                     'Cohesin-only_lowsalt.yama']

### Figure style and output directory

In [None]:
from figure_style import *

In [None]:
set_style_paper()

In [None]:
sns.palplot(palette)

In [None]:
# change to desired output directory
dir_out = '/Users/matze/Documents/PhD/Manuscripts/MCM-cohesin_sliding/Figures/SVGs/no_statistics/'
dir_out

### Instantiate selected archives

In [None]:
for archive in tqdm(selected_archives):
    instantiate_archive(archive, awesome_data.datasets)

### Archives Overview

In [None]:
describe_archives(DnaMoleculeArchive.instances)


### Load segment tables, apply filter & detect pauses

In [None]:
for index,archive in enumerate(tqdm(DnaMoleculeArchive.instances)):
    
    archive.add_segments_tables()
    archive.detect_pauses(thresh=200, sigma_max=30, length=1)
    

### Add df_noidle to all molecules

In [None]:
for archive in tqdm(DnaMoleculeArchive.instances):
    
    archive.add_df_noidle(prefix='Cohesin_1_')

### Functions

In [None]:
def calc_msd(df, prefix, msd_col, time_col, tau=1, start=np.nan, end=np.nan):
    """
    Calculates mean square displacement of molecules.
    Remove pairs with missing frames in interval, do not drop NaN values!
    *df: dataframe
    *prefix: protein prefix
    *msd_col: column in df to calculate msd (without protein prefix!)
    *time_col: time column in df (without protein prefix!)
    *tau: time delta used to calculate msd (without protein prefix!)
    *ignore_region: provide region name (regex) which is ignored for msd calculation (default False)
    
    returns (msd, dtime)
    msd: mean squared error
    dtime: mean time difference underlying msd calculation
    """
    
    # if not start / end was specified by used, set to max range
    if np.isnan(start):
        #start=df[prefix+time_col].iloc[0]
        start=df[prefix+time_col].min()
        
    if np.isnan(end):
        #end=df[prefix+time_col].iloc[-1]
        end=df[prefix+time_col].max()
        
    # will be a more accurate way to calculate MSD due to existing NaN values
    diffs = []
    dtimes = []
    for row in df.index:
        try:
            # check if sliding window range
            if (df.loc[row, prefix+time_col] >= start and df.loc[row+tau, prefix+time_col] <= end):

                # check if we have consecutive frames (T) which are not null
                if (((df.loc[row+tau, prefix+'T'] - df.loc[row, prefix+'T']) == tau) and not
                    ((np.isnan(df.loc[row+tau, prefix+msd_col]) or np.isnan(df.loc[row, prefix+msd_col])))):

                    diffs.append(df.loc[row+tau, prefix+msd_col] - df.loc[row, prefix+msd_col])
                    dtimes.append(df.loc[row+tau, prefix+time_col] - df.loc[row, prefix+time_col])

                else:
                    #skip to next row
                    continue

        # end of df
        except KeyError:
            break
            
    return np.mean(np.square(diffs)), np.mean(dtimes)

In [None]:
def calc_ars1_encounters(molecule):
    """
    Calculates encounters and encounter outcomes with ARS1 for a molecule object (dynamic treshold).
    """
    # defining dynamic tresholds based on DNA length
    # mean DNA length from analysis: 35.96 px
    mean_dna_length = 35.94
    _dna_length = molecule.calc_length_dna()
    thresh1 = mean_dna_length / _dna_length * 1500
    thresh2 = mean_dna_length / _dna_length * 500
    
    #reset to 0 in case it reruns
    #visualized encounters
    molecule.ars1_encounters_vis = 0
    
    #passage events
    molecule.ars1_pass = 0
    
    #blocking events
    molecule.ars1_block = 0
    
    #time between consecutive passage events
    molecule.pass_times = []
       
    
    #get number of theoretical encounters with ARS1 from cohesin only molecules
    try:
        if molecule.proteins['MCM'] > 0:
            pos_ars1 = molecule.df['MCM_1_Position_on_DNA'].median()
        else:
            pos_ars1 = 5557.5
    except KeyError:
        pos_ars1 = 5557.5

    for row in molecule.df_noidle.index:
        
        try:
            #direct collision
            if abs(molecule.df_noidle.loc[row,'Cohesin_1_Position_on_DNA'] - pos_ars1) < thresh1:
                  
                #passed
                if (molecule.df_noidle.loc[row,'Cohesin_1_Position_on_DNA']<pos_ars1<molecule.df_noidle.loc[row+1,'Cohesin_1_Position_on_DNA']-thresh2 or 
                    molecule.df_noidle.loc[row,'Cohesin_1_Position_on_DNA']>pos_ars1>molecule.df_noidle.loc[row+1,'Cohesin_1_Position_on_DNA']+thresh2):
                    
                    molecule.ars1_encounters_vis+=1
                    molecule.ars1_pass+=1
                    molecule.pass_times.append(molecule.df_noidle.loc[row+1,'Cohesin_1_Time (s)'])

                #blocked
                else:
                    molecule.ars1_encounters_vis+=1
                    molecule.ars1_block+=1

            #no direct collision
            else:

                if (molecule.df_noidle.loc[row,'Cohesin_1_Position_on_DNA']<pos_ars1<molecule.df_noidle.loc[row+1,'Cohesin_1_Position_on_DNA']-thresh2) or (
                    molecule.df_noidle.loc[row,'Cohesin_1_Position_on_DNA']>pos_ars1>molecule.df_noidle.loc[row+1,'Cohesin_1_Position_on_DNA']+thresh2):
            
                    molecule.ars1_encounters_vis+=1
                    molecule.ars1_pass+=1
                    molecule.pass_times.append(molecule.df_noidle.loc[row+1,'Cohesin_1_Time (s)'])

        except KeyError:
            #print('last row reached, skip.')
            pass
    
    #calculate difference between passage times
    molecule.pass_diff = np.diff(molecule.pass_times,n=1)

### Generate main df

In [None]:
def setup_pandas_df(archive_instances):
    '''Sets up a 2-level df with all relevant information'''
    #indices will be UIDs
    #outside: set comprehension to cover all possible prefixes in all instances
    prefixes = set()
    for archive in archive_instances:
        prefixes.update(archive.prefixes)
    col_1 = sorted(list(prefixes))
    #inside
    col_2 = ['lifetime', 'initial_intensity', 'msd', 'dtime', 'd_coeff']

    hier_index = pd.MultiIndex.from_product([col_1,col_2],names=['molecule','properties'])

    return pd.DataFrame(columns=hier_index)

In [None]:
df = setup_pandas_df(DnaMoleculeArchive.instances)

# sort MultiIndexCols for performance
df.sort_index(axis=1, inplace=True)
df.head()

In [None]:
for archive in tqdm(DnaMoleculeArchive.instances):

    for molecule in archive.molecules:
        # general columns first
        df.loc[molecule.uid,'dna_length'] = molecule.calc_length_dna()
        df.loc[molecule.uid,'nucleotide'] = archive.nucleotide
        df.loc[molecule.uid,'nacl'] = archive.nacl
        
        # assign MCM variant and change to n/a for molecules without MCM on DNA
        try:
            if molecule.proteins['MCM'] > 0:
                df.loc[molecule.uid,'MCM_variant'] = archive.mcm
            else:
                df.loc[molecule.uid,'MCM_variant'] = 'n/a'
        except KeyError:
            # archive has 'n/a' by default
            df.loc[molecule.uid,'MCM_variant'] = archive.mcm
        
        try:
            df.loc[molecule.uid,'MCM_bleaching_steps'] = molecule.params['MCM_bleaching_steps']
        except KeyError:
            pass
        
        df.loc[molecule.uid,'tags'] = reduce(lambda tag1,tag2: tag1+','+tag2, molecule.tags)
        
        # calculate encounters between Cohesin and ARS1 here
        calc_ars1_encounters(molecule)
        df.loc[molecule.uid,'Visualized encounters'] = molecule.ars1_encounters_vis
        df.loc[molecule.uid,'ARS1 pass'] = molecule.ars1_pass
        df.loc[molecule.uid,'ARS1 block'] = molecule.ars1_block
        
        if ((molecule.ars1_pass + molecule.ars1_block) != molecule.ars1_encounters_vis):
            print(molecule.uid,'NO MATCH UP IN COLLISIONS BEFORE PAUSE')
        
        # general proteins
        for protein in molecule.proteins:
            #store protein number
            df.loc[molecule.uid,'number_'+protein] = molecule.params['Number_'+protein]
            #assign label for each protein
            df.loc[molecule.uid,'label_'+protein] = archive.labels[protein]
            
            
        # every specific protein
        for prefix in molecule.prefixes:

            #take mean intensity from first 10 frames (maybe need try block later if shorter traces)
            df.loc[molecule.uid,(prefix,'initial_intensity')] = molecule.df.iloc[:5][prefix+'Intensity'].mean()

            #observation time (in s) defined by tracking length
            df.loc[molecule.uid,(prefix,'timespan_(s)')] = molecule.df.filter(regex=prefix).dropna()[prefix+'Time_(s)'].max()
            
            # average position on DNA (try mean but might switch to median due to noise)
            df.loc[molecule.uid,(prefix,'avg_position_on_dna')] = molecule.df[prefix+'Position_on_DNA'].median()
            
            # MSD (kbp^2) & dtime (s)    
            msd, dtime = calc_msd(df=molecule.df_noidle, prefix=prefix, msd_col='Position_on_DNA', time_col='Time_(s)', 
                                  tau=1)
            df.loc[molecule.uid,(prefix,'msd')] = msd/(1000**2)
            df.loc[molecule.uid,(prefix,'dtime')] = dtime

            # diffusion coefficient (kbp^2/s)
            df.loc[molecule.uid,(prefix,'d_coeff')] = df.loc[molecule.uid,(prefix,'msd')] / (2*df.loc[molecule.uid,(prefix,'dtime')])
            
            # pauses:
            # number of pauses
                        
            for seg_df in filter(lambda sdf: sdf.type == 'rate' and sdf.prefix == prefix, molecule.seg_dfs):
                df.loc[molecule.uid,(prefix,'number_pauses')] = len(seg_df.df[seg_df.df['pause_B']])
                
                # count each pause as one encounter which was blocked
                df.loc[molecule.uid,'Visualized encounters'] =  df.loc[molecule.uid,'Visualized encounters'].item() + len(seg_df.df[seg_df.df['pause_B']])
                df.loc[molecule.uid,'ARS1 block'] = df.loc[molecule.uid,'ARS1 block'].item() + len(seg_df.df[seg_df.df['pause_B']])
            
                # cumulative pause duration
                df.loc[molecule.uid,(prefix,'cum_pause_duration_(s)')] = (seg_df.df[seg_df.df['pause_B']]['X2'] - seg_df.df[seg_df.df['pause_B']]['X1']).sum()
                   
                # fraction of trajectory protein being idle
                df.loc[molecule.uid,(prefix,'fraction_idle')] = (df.loc[molecule.uid,(prefix,'cum_pause_duration_(s)')] /
                                                                 df.loc[molecule.uid,(prefix,'timespan_(s)')])
            
            if (df.loc[molecule.uid,'Visualized encounters'].item() !=  (df.loc[molecule.uid,'ARS1 block'].item() + df.loc[molecule.uid,'ARS1 pass'].item())):
                print(molecule.uid,'NO MATCH UP IN COLLISIONS AFTER PAUSE')
                
        
# convert tags back to list
df['tags'] = df['tags'].apply(lambda tags: tags.split(','))

# replace NaN values for number_MCM with 0, then rename and convert to boolean
df['number_MCM'] = df['number_MCM'].apply(lambda value: 0 if np.isnan(value) else value)
df['MCM_at_origin'] = df['number_MCM'].apply(lambda value: value>=1)

# again sort MultiIndexCols for performance
df.sort_index(axis=1, inplace=True)

# fix data types
df = df.infer_objects()


In [None]:
df

In [None]:
df['ARS1 passing probability'] = df['ARS1 pass'] / df['Visualized encounters']


In [None]:
# df with all molecules removed with 0 visualized encounters
df2 = df.copy()

df2 = df2[(df2['Visualized encounters']!=0)]

## setup df_pause

In [None]:
df_pause = pd.DataFrame(columns=['MCM_at_origin', 'MCM_bleaching_steps', 'MCM_variant', 'nacl', 
                                 'pause_duration','MCM_position_on_dna', 'pause_position_on_dna', 'pause_at_MCM', 'Cohesin_turnaround'])   
df_pause

In [None]:
for archive in tqdm(DnaMoleculeArchive.instances):

    for molecule in archive.molecules:
        
        # defining dynamic tresholds based on DNA length
        # mean DNA length from analysis: 35.95 px
        mean_dna_length = 35.95
        _dna_length = molecule.calc_length_dna()
        thresh1 = mean_dna_length / _dna_length * 1500
        thresh2 = mean_dna_length / _dna_length * 500
        
        
        #get number of theoretical encounters with ARS1 from cohesin only molecules
        try:
            if molecule.proteins['MCM'] > 0:
                pos_ars1 = molecule.df['MCM_1_Position_on_DNA'].median()
            else:
                pos_ars1 = 5557.5
        except KeyError:
            pos_ars1 = 5557.5

        # pauses:
        for seg_df in filter(lambda sdf: sdf.type == 'rate' and sdf.prefix == 'Cohesin_1_', molecule.seg_dfs):
            for row, index in seg_df.df[seg_df.df['pause_B']].iterrows():
                pause_start = seg_df.df[seg_df.df['pause_B']].loc[row, 'X1']
                pause_end = seg_df.df[seg_df.df['pause_B']].loc[row, 'X2']
                # temporary df
                _temp_df = pd.DataFrame()
                try:
                    _temp_df.loc[0,'MCM_at_origin'] = molecule.proteins['MCM'] > 0
                    _temp_df.loc[0,'MCM_bleaching_steps'] = molecule.params['MCM_bleaching_steps']
                    _temp_df.loc[0,'MCM_variant'] = archive.mcm
                    _temp_df.loc[0,'MCM_position_on_dna'] = molecule.df['MCM_1_Position_on_DNA'].median()
                except KeyError:
                    _temp_df.loc[0,'MCM_at_origin'] = False
                    _temp_df.loc[0,'MCM_variant'] = 'n/a'

                _temp_df.loc[0,'nacl'] = archive.nacl
                _temp_df.loc[0,'pause_duration'] = pause_end - pause_start

                _temp_df.loc[0,'pause_position_on_dna'] = molecule.df[(molecule.df[seg_df.prefix+seg_df.col_x] >= pause_start) & 
                                                                (molecule.df[seg_df.prefix+seg_df.col_x] <= pause_end)]['Cohesin_1_Position_on_DNA'].median()

                try:
                    # before and after pause frames higher DNA position
                    if ((molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item()-1,'Cohesin_1_Position_on_DNA'] > 
                         molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA']) and
                        (molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item()+1,'Cohesin_1_Position_on_DNA'] > 
                         molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item(),'Cohesin_1_Position_on_DNA'])):

                        _temp_df.loc[0,'Cohesin_turnaround'] = True

                    # before and after pause frames lower DNA position
                    elif ((molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item()-1,'Cohesin_1_Position_on_DNA'] < 
                         molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA']) and
                        (molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item()+1,'Cohesin_1_Position_on_DNA'] < 
                         molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item(),'Cohesin_1_Position_on_DNA'])):
                          _temp_df.loc[0,'Cohesin_turnaround'] = True

                    else:
                          _temp_df.loc[0,'Cohesin_turnaround'] = False
                
                # pause starting from first frame
                except KeyError:
                    pass
                

                try:
                    #pause happened in origin range
                    if abs(molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA'] - pos_ars1) < thresh1:

                        #passed
                        if ((molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA'] < 
                             pos_ars1 < 
                             molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item()+1,'Cohesin_1_Position_on_DNA']-thresh2) 
                            or 
                            (molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA'] > 
                             pos_ars1 > 
                             molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item()+1,'Cohesin_1_Position_on_DNA']+thresh2)):
                            
                            _temp_df.loc[0,'Cohesin_pause_pass'] = True

                        #blocked
                        else:
                            _temp_df.loc[0,'Cohesin_pause_pass'] = False

                    #no direct collision
                    else:

                        if ((molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA'] < 
                             pos_ars1 < 
                             molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item()+1,'Cohesin_1_Position_on_DNA']-thresh2) 
                            or 
                            (molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_start].item(),'Cohesin_1_Position_on_DNA'] > 
                             pos_ars1 > 
                             molecule.df.loc[molecule.df.index[molecule.df['Cohesin_1_Time_(s)'] == pause_end].item()+1,'Cohesin_1_Position_on_DNA']+thresh2)):

                            _temp_df.loc[0,'Cohesin_pause_pass'] = True

                except KeyError:
                    #print('last row reached, skip.')
                    pass


                try:
                    _temp_df.loc[0,'pause_at_MCM'] = abs(molecule.df['MCM_1_Position_on_DNA'].median() - _temp_df.loc[0,'pause_position_on_dna']) <= thresh1
                except KeyError:
                    pass

                df_pause = pd.concat([df_pause, _temp_df])
            
# again sort MultiIndexCols for performance
df_pause.sort_index(axis=1, inplace=True)

# fix data types
df_pause = df_pause.infer_objects()

In [None]:
df_pause.head()

## EDA

In [None]:
# where does cohesin accumulate in regard to MCM positions
sns.distplot(df[('MCM_1_','avg_position_on_dna')])
sns.distplot(df[('Cohesin_1_','avg_position_on_dna')])

In [None]:
# overview of barrier function calculated by 3 different methods
df2.groupby(['nacl','number_MCM']).describe().filter(regex='ARS1 passing probability')

In [None]:
df2.head()

## Figures

In [None]:
sns.palplot(palette)

### Example MCM bleaching trajectories

In [None]:
example_molecules = ['bfhq3KJhprt6Uk9xqN2RVw', 'qf26JqfJC5CXLBifQZa1MZ','3T4CJeXAeAJusyfK3Co8B6','x2yJf3Rz9b4VZHFeJhm2F2'
                     , 'ctWAySLeuyw2muGU2mM3vG', 'o5DmPxZvHpJSh87zn5f68s']

In [None]:
molecule.df

In [None]:
# 2 step photobleaching MCM example molecule: ySey5qnA9nZEdm66fh6Ku
rows = 3
cols = 2

fig,axes=plt.subplots(rows,cols,figsize=(3,3),sharex=True, sharey=True)

row_counter = 0
col_counter = 0

for uid in example_molecules:
    molecule = DnaMoleculeArchive.instances[0].get_molecule_by_uid(uid)
    sns.lineplot(x='MCM_1_Time_(s)', y='MCM_1_Intensity',data=molecule.df,
                 ax=axes[row_counter,col_counter],color='m',lw=0.25)
    for seg_df in filter(lambda sdf: sdf.type == 'bleaching', molecule.seg_dfs):
        for row in seg_df.df.index:
            temp_df = pd.DataFrame(data=[seg_df.df.loc[row,['X1','Y1']].values, seg_df.df.loc[row,['X2','Y2']].values],columns=['X','Y'])
            sns.lineplot(x='X', y='Y',data=temp_df,ax=axes[row_counter,col_counter],color='k',lw=1)

        if col_counter < cols-1:
            col_counter+=1
        else:
            row_counter+=1
            col_counter=0
        
fig.text(0.5, 0.04, 'Time (s)', ha='center')
fig.text(0.04, 0.5, 'MCM fluorescence (AU)', va='center', rotation='vertical')
        
for ax_array in axes:
    for ax in ax_array:
        ax.set_xlabel(None)
        ax.set_ylabel(None)


plt.xticks([0,50,100,150,200])
plt.yticks([0,5000,10000,15000,20000])
axes[0,0].set_yticklabels([0,0.5,1.0,1.5,2.0])
plt.xlim(0,220)
plt.ylim([-3000,20000])
#plt.ticklabel_format(axis="y", style="sci",scilimits=(0,0))
#fig.suptitle('Photobleaching confirms MCM DH formation')

sns.despine()

fig.tight_layout(rect=[0.05, 0.05, 0.95, 0.95])

plt.savefig(dir_out+"MCM_DH_bleaching.svg",transparent=True)

### 1 - Cohesin diffusion coefficient (pause segments and YDF data excluded)

In [None]:
fig,axes = plt.subplots()
data = df[df['MCM_variant'] != 'ydf']
#setup style

boxprops = {'edgecolor': '.15', 'linewidth': 1, 'facecolor':'w'}
lineprops = {'color': '.15', 'linewidth': 1}
kwargs = {'hue_order': [False, True]}

boxplot_kwargs = dict({'boxprops': boxprops, 'medianprops': lineprops,
                       'whiskerprops': lineprops, 'capprops': lineprops,
                       'width': 0.75},
                      **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.boxplot(x='nacl', y=('Cohesin_1_','d_coeff'), hue='MCM_at_origin',data=data, ax=axes,
           order=['150 mM', '500 mM'], fliersize=0, palette=palette[2::3], **boxplot_kwargs)
sns.stripplot(x='nacl', y=('Cohesin_1_','d_coeff'), hue='MCM_at_origin',data=data, ax=axes,
           order=['150 mM', '500 mM'], dodge=True, jitter=0.2, palette=palette[2::3], **stripplot_kwargs)

axes.set_xlim(-0.5,1.5)
axes.set_ylim(-5,100)
axes.set_xlabel('NaCl concentration')
axes.set_ylabel('Cohesin diffusion coefficient (kbp2/s)')

#calculate number of observations
nobs = data.groupby(['nacl','number_MCM']).size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]

# calculate mean and 95 % ci by bootstrapping (more accurate than 1.96xsem)

bootstrap_mean = []
bootstrap_95_ci_lower = []
bootstrap_95_ci_upper = []

for nacl in data['nacl'].sort_values().unique():
    for ori_mcm in data['MCM_at_origin'].sort_values().unique():       
        bootstrap_data = data[(data['nacl'] == nacl) & (data['MCM_at_origin']==ori_mcm)][('Cohesin_1_','d_coeff')]
        bootstrap_means = bootstrap(bootstrap_data,n_boot=10000)
        bootstrap_mean.append(np.mean(bootstrap_means))
        ci_lower, ci_upper = calc_ci(bootstrap_means,ci=95)
        bootstrap_95_ci_lower.append(ci_lower)
        bootstrap_95_ci_upper.append(ci_upper)

bootstrap_mean = [round(num,1) for num in bootstrap_mean]
ci_95_range = [(bootstrap_95_ci_upper[i] - bootstrap_95_ci_lower[i])/2 for i in range(len(bootstrap_95_ci_upper))]
ci_95_range = [round(num,1) for num in ci_95_range]

diffusion = [f'{bootstrap_mean[i]}\n+ {ci_95_range[i]}' for i in range(len(bootstrap_mean))]

#put nobs and diff values into plot

ind = 0
for tick in range(len(axes.get_xticklabels())):
    axes.text(tick-.2, 110, nobs[ind], horizontalalignment='center', verticalalignment='top', color='.15')
    axes.text(tick+.2, 110, nobs[ind+1], horizontalalignment='center', verticalalignment='top', color='.15')
    axes.text(tick-.2, 100, diffusion[ind], horizontalalignment='center', verticalalignment='top', color='.15')
    axes.text(tick+.2, 100, diffusion[ind+1], horizontalalignment='center', verticalalignment='top', color='.15')
    ind += 2

# alpha for boxplot filling (not required here)
for patch in axes.artists:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .2))
    
for i, artist in enumerate(axes.artists):
    if i % 2 == 0:
        col = palette[2]
    else:
        col = palette[5]

    # This sets the color for the main box
    artist.set_edgecolor(col)
    # Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.)
    # Loop over them here, and use the same colour as above
    for j in range(i*6,i*6+6):
        line = axes.lines[j]
        line.set_color(col)
        line.set_mfc(col)
        line.set_mec(col)
        
# Fix the legend, keep only the first two legend elements
handles, labels = axes.get_legend_handles_labels()
lgd = axes.legend(handles[2:], ['No', 'Yes'], frameon=False, title='MCM at origin?',
               loc='center left', bbox_to_anchor=[1, 0.5],handletextpad=0.5)

lgd.legendHandles[0]._sizes = [40]
lgd.legendHandles[1]._sizes = [40]


sns.despine()

plt.savefig(dir_out+'Cohesin_diffusion_coefficient.svg',transparent=True)

### 2 - MCM barrier for cohesin translocation

### a - 150 mM NaCl 

In [None]:
fig,axes = plt.subplots()
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM.svg',transparent=True)

In [None]:
fig,axes = plt.subplots(figsize=(3.66,1.98))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_BartSize.svg',transparent=True)

In [None]:
fig,axes = plt.subplots()
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': '.15', 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_v2.svg',transparent=True)

In [None]:
fig,axes = plt.subplots()
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': '.15', 'linewidth': 1,
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_v3.svg',transparent=True)

In [None]:
fig,axes = plt.subplots(figsize=(3.66,1.98))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': '.15', 'linewidth': 1,
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_v3_PAPER.svg',transparent=True)

In [None]:
fig,axes = plt.subplots()
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': '.15', 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **barplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_v4.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(17,2))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0), 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.2},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=None, **vioplot_kwargs, color='w', inner=None)

# change edgelinecolor
for i in range(3):
    
    axes.collections[i].set_edgecolor(palette[2+i*2])
    axes.collections[i].set_alpha(0.5)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], markers='', **vioplot_kwargs)
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor=palette[2::2], elinewidth=1)


axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

fig.set_size_inches(2.67,2)   
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_violin.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(12,2))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0), 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.2},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=None, **vioplot_kwargs, color='w', inner=None)

# change edgelinecolor
for i in range(3):
    
    axes.collections[i].set_edgecolor(palette[2+i*2])
    axes.collections[i].set_alpha(0.5)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

fig.set_size_inches(2.67,2)   
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_violin-v2.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(12,2))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,palette=palette[2::2],
             **vioplot_kwargs, inner=None)

# change edgelinecolor
for i in range(3):
    
    axes.collections[i].set_edgecolor(palette[2+i*2])
    axes.collections[i].set_alpha(0.4)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

fig.set_size_inches(2.67,2)
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_violin-v3.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(11.5,1.98))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,palette=palette[2::2],
             **vioplot_kwargs, inner=None)

# change edgelinecolor
for i in range(3):
    
    axes.collections[i].set_edgecolor(palette[2+i*2])
    axes.collections[i].set_alpha(0.4)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

fig.set_size_inches(3.66,1.98)
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_violin-v3_BartSize.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(12,2))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,palette=palette[2::2],
             **vioplot_kwargs, inner=None)

# change edgelinecolor
for i in range(3):
    
    axes.collections[i].set_edgecolor('.15')
    axes.collections[i].set_alpha(0.4)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_xlim(-0.5, 2.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

fig.set_size_inches(2.67,2)   
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_150mM_violin-v4.svg',transparent=True)

In [None]:
fig,axes = plt.subplots(1,3,figsize=(7,2), sharey=True)
sns.histplot(data=data[data['MCM_variant']=='n/a'], x='ARS1 passing probability', bins=np.linspace(0,1,20), stat='probability',
            color=palette[2],ax=axes[0], label='No')
sns.histplot(data=data[data['MCM_variant']=='wt'], x='ARS1 passing probability', bins=np.linspace(0,1,20), stat='probability',
            color=palette[4],ax=axes[1], label='Wildtype')
sns.histplot(data=data[data['MCM_variant']=='ydf'], x='ARS1 passing probability', bins=np.linspace(0,1,20), stat='probability',
            color=palette[6],ax=axes[2], label='Mcm3-YDF')

axes[0].set_ylim(0,0.8)
for ax in axes:
    ax.set_xlim(0,1)
fig.legend(frameon=False, title='MCM at origin?')
sns.despine(offset=5)

plt.savefig(dir_out+'Cohesin_origin_bypass_DistProblem.svg',transparent=True)

### b - 500 mM NaCl

In [None]:
fig,axes = plt.subplots(figsize=(1.78,2))
nacl_conc = '500 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 1.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_500mM.svg',transparent=True)


In [None]:
fig,axes = plt.subplots(figsize=(1.91,1.52))
nacl_conc = '500 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_xlim(-0.5, 1.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_500mM_BartSize.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(3,2))
nacl_conc = '500 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,palette=palette[2::2],
             **vioplot_kwargs, inner=None)

# change edgelinecolor
for i in range(2):
    
    axes.collections[i].set_edgecolor(palette[2+i*2])
    axes.collections[i].set_alpha(0.4)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,2), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_xlim(-0.5, 1.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})'])

fig.set_size_inches(1.78,2)
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_500mM_violin-v3.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(1.91,1.52))
nacl_conc = '500 mM'
data = df2[df2['nacl'] == nacl_conc]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.violinplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,palette=palette[2::2],
             **vioplot_kwargs, inner=None)

# change edgelinecolor
for i in range(2):
    
    axes.collections[i].set_edgecolor(palette[2+i*2])
    axes.collections[i].set_alpha(0.4)

sns.swarmplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y='ARS1 passing probability',data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant')['ARS1 passing probability'].mean().values
plt.errorbar(range(0,2), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_xlim(-0.5, 1.5)
axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability of cohesin bypassing origin')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})'])

fig.set_size_inches(1.91,1.52)
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_500mM_violin-v3_BartSize.svg',transparent=True)

### Check DNA length distribution

In [None]:
fig,axes = plt.subplots(figsize=(1.78,1.33))

data = df2

sns.distplot(data['dna_length'], bins=np.linspace(25,50,26), kde=False, fit=norm, color='.15', norm_hist=True)

axes.set_xlim(25,50)
axes.set_ylim(0, 0.25)
axes.set_yticks([0,0.1,0.2,0.3])
axes.set_yticklabels([0,0.1,0.2,0.3])
axes.set_xlabel('DNA length (px)')
axes.set_ylabel('Probability density')
 
axes.text(0.95, 0.95, f"n = {len(data['dna_length'])}",
        verticalalignment='top', horizontalalignment='right',
        transform=axes.transAxes)

sns.despine()

plt.savefig(dir_out+'DNA_lengths.svg',transparent=True)

In [None]:
df2['dna_length'].mean()

### 3 - MCM barrier for cohesin translocation vs #MCM

### a - 150 mM NaCl - wildtype MCM

In [None]:
fig,axes = plt.subplots()
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc) & (df2['MCM_variant'] == 'wt')]

barprops = {'edgecolor': palette[3::], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_bleaching_steps', y='ARS1 passing probability', data=data, ax=axes,
            palette=palette[3::], **barplot_kwargs)

sns.stripplot(x='MCM_bleaching_steps', y='ARS1 passing probability', data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[3::], **stripplot_kwargs)

axes.set_ylim(-0.05,1)
axes.set_xlabel('# of MCM at origin (bleaching steps)')
axes.set_ylabel('Probability of cohesin bypassing origin')
axes.set_xticklabels(range(1,5))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_bleaching_steps').size()
nobs = [str(x) for x in nobs]
nobs = ['n = ' + i for i in nobs]

#put nobs into plot together with xlabel
#get current xticklabels
labels = [label.get_text() for label in axes.get_xticklabels()]
#merge labels with nobs
for i in range(len(labels)):
    labels[i] = labels[i]+'\n('+nobs[i]+')'
axes.set_xticklabels(labels)

sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_vs#MCM-wt_150mM.svg',transparent=True)

### b - 150 mM NaCl - YDF MCM

In [None]:
fig,axes = plt.subplots()
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc) & (df2['MCM_variant'] == 'ydf')]

barprops = {'edgecolor': palette[3::], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_bleaching_steps', y='ARS1 passing probability', data=data, ax=axes,
            palette=palette[3::], **barplot_kwargs)

sns.stripplot(x='MCM_bleaching_steps', y='ARS1 passing probability', data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[3::], **stripplot_kwargs)

axes.set_ylim(-0.05,1)
axes.set_xlabel('# of MCM at origin (bleaching steps)')
axes.set_ylabel('Probability of cohesin bypassing origin')
axes.set_xticklabels(range(1,5))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_bleaching_steps').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
#get current xticklabels
labels = [label.get_text() for label in axes.get_xticklabels()]
#merge labels with nobs
for i in range(len(labels)):
    labels[i] = labels[i]+'\n('+nobs[i]+')'
axes.set_xticklabels(labels)

sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_vs#MCM-YDF_150mM.svg',transparent=True)

###  c - 500 mM NaCl - wildtype MCM

In [None]:
fig,axes = plt.subplots()
nacl_conc = '500 mM'
data = df2[(df2['nacl'] == nacl_conc) & (df2['MCM_variant'] == 'wt')]

barprops = {'edgecolor': palette[3::], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_bleaching_steps', y='ARS1 passing probability', data=data, ax=axes,
            palette=palette[3::], **barplot_kwargs)

sns.stripplot(x='MCM_bleaching_steps', y='ARS1 passing probability', data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[3::], **stripplot_kwargs)

axes.set_ylim(-0.05,1)
axes.set_xlabel('# of MCM at origin (bleaching steps)')
axes.set_ylabel('Probability of cohesin bypassing origin')
axes.set_xticklabels(range(1,5))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_bleaching_steps').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
#get current xticklabels
labels = [label.get_text() for label in axes.get_xticklabels()]
#merge labels with nobs
for i in range(len(labels)):
    labels[i] = labels[i]+'\n('+nobs[i]+')'
axes.set_xticklabels(labels)
    
sns.despine()

plt.savefig(dir_out+'Cohesin_origin_bypass_vs#MCM-wt_500mM.svg',transparent=True)

### 4 - Pauses in cohesin translocation (150 mM data only)

### a - Cohesin translocation: fraction idle

In [None]:
fig,axes = plt.subplots(figsize=([2.67, 2.36]))
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc)]

barprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)
stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_ylim(-0.05,1.2)
axes.set_yticks(np.arange(0,1.2,0.2))
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Fraction of cohesin pausing')


# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_trajectory_idle_150mM.svg',transparent=True)


In [None]:
fig,axes = plt.subplots(figsize=(3.66,2.36))
nacl_conc = '150 mM'
data = df2[df2['nacl'] == nacl_conc]

barprops = {'edgecolor': '.15', 'linewidth': 1,
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)


# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

sns.stripplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_ylim(-0.05,1.2)
axes.set_yticks(np.arange(0,1.2,0.2))
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Fraction of cohesin pausing')

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
     
sns.despine()

plt.savefig(dir_out+'Cohesin_trajectory_idle_150mM_PAPER.svg',transparent=True)

In [None]:
fig,axes = plt.subplots(figsize=([3.66, 2.36]))
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc)]

barprops = {'edgecolor': palette[2::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

barplot_kwargs = dict(barprops, **kwargs)
stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_ylim(-0.05,1.2)
axes.set_yticks(np.arange(0,1.2,0.2))
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Fraction of cohesin pausing')


# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_trajectory_idle_150mM_BartSize.svg',transparent=True)

In [None]:
# as violinplot
fig,axes = plt.subplots(figsize=(21,2.36))
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc)]

vioprops = {'edgecolor': palette[2::2], 'linewidth': 1, 
            'errwidth':1, 'capsize':.2, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf']}

vioplot_kwargs = dict(vioprops, **kwargs)

swarmplot_kwargs = dict({'linewidth': 0.3, 'size': 2, 'alpha': 0.4},
                        **kwargs)

sns.violinplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,palette=palette[2::2],
             **vioplot_kwargs, inner=None, cut=2)

# change edgelinecolor
for i in range(2):
    
    axes.collections[i].set_edgecolor(palette[4+i*2])
    axes.collections[i].set_alpha(0.4)

sns.swarmplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
           dodge=True, palette=palette[2::2], **swarmplot_kwargs)
# plot 95 % ci
sns.pointplot(x='MCM_variant', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
            markers='', **vioplot_kwargs, join=False, color='.15')
# plot mean
means = data.groupby('MCM_variant').mean()[('Cohesin_1_','fraction_idle')].values
plt.errorbar(range(0,3), means, xerr=0.3, fmt='none', ecolor='.15', elinewidth=1)


axes.set_ylim(-0.05,1.2)
axes.set_yticks(np.arange(0,1.2,0.2))
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Fraction of cohesin pausing')


#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

fig.set_size_inches(3.66,1.98)
sns.despine()

plt.savefig(dir_out+'Cohesin_trajectory_idle_150mM_violin_Bartsize.svg',transparent=True)

### b - Cohesin translocation: fraction idle vs #MCM(wt)

In [None]:
fig,axes = plt.subplots(figsize=([2.67, 2.36]))
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc) & (df2['MCM_variant'] == 'wt')]

barprops = {'edgecolor': palette[3::], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_bleaching_steps', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
            palette=palette[3::], **barplot_kwargs)

sns.stripplot(x='MCM_bleaching_steps', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[3::], **stripplot_kwargs)



axes.set_ylim(-0.05,1.2)
axes.set_yticks(np.arange(0,1.2,0.2))
axes.set_xlabel('# of MCM at origin (bleaching steps)')
axes.set_ylabel('Fraction of cohesin pausing')
axes.set_xticklabels(range(1,5))

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_bleaching_steps').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot

#put nobs into plot together with xlabel
#get current xticklabels
labels = [label.get_text() for label in axes.get_xticklabels()]
#merge labels with nobs
for i in range(len(labels)):
    labels[i] = labels[i]+'\n('+nobs[i]+')'
axes.set_xticklabels(labels)
    
sns.despine()

plt.savefig(dir_out+'Cohesin_trajectory_idle_vs#MCM_150mM.svg',transparent=True)

### c - Cohesin translocation: fraction idle vs #MCM(YDF)

In [None]:
fig,axes = plt.subplots(figsize=([2.67, 2.36]))
nacl_conc = '150 mM'
data = df2[(df2['nacl'] == nacl_conc) & (df2['MCM_variant'] == 'ydf')]

barprops = {'edgecolor': palette[3::], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {}

barplot_kwargs = dict(barprops, **kwargs)

stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_bleaching_steps', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes, 
            palette=palette[3::], **barplot_kwargs)

sns.stripplot(x='MCM_bleaching_steps', y=('Cohesin_1_','fraction_idle'), data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[3::], **stripplot_kwargs)


axes.set_ylim(-0.05,1.2)
axes.set_yticks(np.arange(0,1.2,0.2))
axes.set_xlabel('# of MCM at origin (bleaching steps)')
axes.set_ylabel('Fraction of cohesin pausing')
axes.set_xticklabels(range(1,5))

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_bleaching_steps').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot

#put nobs into plot together with xlabel
#get current xticklabels
labels = [label.get_text() for label in axes.get_xticklabels()]
#merge labels with nobs
for i in range(len(labels)):
    labels[i] = labels[i]+'\n('+nobs[i]+')'
axes.set_xticklabels(labels)
    
sns.despine()

plt.savefig(dir_out+'Cohesin_trajectory_idle_vs#MCM-YDF_150mM.svg',transparent=True)

### d - Cohesin translocation: Pause durations

In [None]:
fig,axes = plt.subplots()

data = df_pause

sns.histplot(data['pause_duration'], bins=np.linspace(0,250,26), color=palette[2], stat='probability', alpha=0.5)

axes.set_xlim(0,225)
axes.set_ylim(0, 0.5)
axes.set_xlabel('Cohesin pause duration (s)')
axes.set_ylabel('Probability')
 
axes.text(0.95, 0.95, f"n = {len(data['pause_duration'])}",
        verticalalignment='top', horizontalalignment='right',
        transform=axes.transAxes)

sns.despine(offset=dict(left=5))

plt.savefig(dir_out+'Cohesin_pause_durations.svg',transparent=True)

In [None]:
from scipy.optimize import curve_fit

In [None]:
data = df_pause[df_pause['pause_duration']>=10]['pause_duration']
X = np.linspace(10,220,211)
X

In [None]:
y = [(data>x).sum().item() for x in X]
y = [num/y[0] for num in y]

In [None]:
plt.plot(X, y)

In [None]:
def func(x, t):
    return 1-(0.5**(x/t))

In [None]:
pars, cov = curve_fit(func, X, y)

# Get the standard deviations of the parameters (square roots of the # diagonal of the covariance)
stdevs = np.sqrt(np.diag(cov))

# Calculate the residuals
res = y - func(X, *pars)

In [None]:
stdevs

In [None]:
pars

In [None]:
fig,axes = plt.subplots(1,2, sharey=True, sharex=True)

data = df_pause

sns.histplot(data[data['MCM_variant'] == 'wt']['pause_duration'], bins=np.linspace(0,250,13), color=palette[4], stat='probability', 
             alpha=0.5, ax=axes[0], label='Wildtype')

sns.histplot(data[data['MCM_variant'] == 'ydf']['pause_duration'], bins=np.linspace(0,250,13), color=palette[6], stat='probability', 
             alpha=0.5, ax=axes[1], label='Mcm3-YDF')
for ax in axes:
    ax.set_xlim(0,225)
    ax.set_ylim(0, 0.7)
    ax.set_xlabel(None)
    ax.set_ylabel('Probability')

axes[0].text(0.95, 0.95, f"n = {len(data[data['MCM_variant'] == 'wt'])}",
        verticalalignment='top', horizontalalignment='right',
        transform=axes[0].transAxes)
axes[1].text(0.95, 0.95, f"n = {len(data[data['MCM_variant'] == 'ydf'])}",
        verticalalignment='top', horizontalalignment='right',
        transform=axes[1].transAxes)

fig.text(0.5, -0.04, 'Cohesin pause duration (s)', ha='center')
fig.legend(frameon=False, loc=5, title='MCM at origin')

sns.despine(offset=dict(left=5))

plt.savefig(dir_out+'Cohesin_pause_durations_hue-MCMvariant.svg',transparent=True)

In [None]:
sns.ecdfplot(data[data['pause_duration']>=10]['pause_duration'], color=palette[2],complementary=True)

In [None]:
df_pause

In [None]:
fig,axes = plt.subplots(figsize=(1.78,2))

data = df_pause

barprops = {'edgecolor': palette[4::2], 'linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order':['wt','ydf']}

barplot_kwargs = dict(barprops, **kwargs)
stripplot_kwargs = dict({'linewidth': 0.3, 'size': 3, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='pause_duration', data=data, ax=axes,estimator=np.median,
            palette=palette[4::2], **barplot_kwargs)
sns.stripplot(x='MCM_variant', y='pause_duration', data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[4::2], **stripplot_kwargs)


axes.set_ylim(-12.5,250)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Cohesin pause duration (s)')
axes.set_xticklabels(['No', 'Wildtype', 'Mcm3-YDF'])

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'Wildtype\n({nobs[0]})', f'Mcm3-YDF\n({nobs[1]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_pause_durations_hueMCM.svg',transparent=True)

In [None]:
df_pause.groupby('MCM_variant')['pause_duration'].describe()

### e - Cohesin translocation: Pause position vs MCM position

In [None]:
fig,axes = plt.subplots()

data = df_pause[df_pause['MCM_at_origin']]

sns.histplot(data['MCM_position_on_dna']/1000, bins=np.linspace(0,22,23), stat='probability',
             color=palette[5], label='MCM', alpha=0.5)

for p in axes.patches:  # turn the histogram upside down
    p.set_height(-p.get_height())
for l in axes.lines:  # turn the kde curve upside down
    l.set_ydata(-l.get_ydata())

sns.histplot(data['pause_position_on_dna']/1000, bins=np.linspace(0,22,23), stat='probability',
             color=palette[2], label='Cohesin', alpha=0.5)

#axes.set_xlim(0,225)
axes.set_ylim(-0.4, 0.4)
axes.set_xlabel('Position on DNA (kb)')
axes.set_ylabel('Probability')
axes.legend(frameon=False, loc=4)
axes.set_yticks(np.arange(-0.4,0.5,0.2))
# pos_ticks = np.array([t for t in axes.get_yticks() if t > 0])
# ticks = np.concatenate([-pos_ticks[::-1], [0], pos_ticks])
# axes.set_yticks(ticks)
axes.set_yticklabels([0.4, 0.2, 0, 0.2, 0.4])
axes.spines['bottom'].set_position('zero')

axes.text(0.95, 0.95, f"n = {len(data['pause_duration'])}",
        verticalalignment='top', horizontalalignment='right',
        transform=axes.transAxes)

sns.despine()

plt.savefig(dir_out+'Cohesin_pause_positions.svg',transparent=True)

### calculate collision outcome cohesin-MCM for each molecule

In [None]:
df2['ARS1 block (%)'] = 100*(df2['ARS1 block'] - df2[('Cohesin_1_','number_pauses')])/ df2['Visualized encounters']
df2['ARS1 pass (%)'] = 100*df2['ARS1 pass']/ df2['Visualized encounters']
df2['ARS1 pause (%)'] = 100*df2[('Cohesin_1_','number_pauses')] / df2['Visualized encounters']
df2

In [None]:
_plot_df = pd.DataFrame()
for column in ['ARS1 pass (%)', 'ARS1 block (%)', 'ARS1 pause (%)']:
    _temp_df = pd.DataFrame()
    _temp_df['probability'] = df2[column]
    _temp_df['outcome'] = column
    _temp_df['MCM_variant'] = df2['MCM_variant']
    _temp_df['nacl'] = df2['nacl']
    
    _plot_df = pd.concat([_plot_df, _temp_df])

_plot_df

In [None]:
_plot_df.groupby(['nacl','outcome','MCM_variant']).describe()

In [None]:
fig,axes = plt.subplots(figsize=(5.34, 2.36))
data = _plot_df[_plot_df['nacl']=='150 mM']

barprops = {'edgecolor': 'k','linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.1, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order': ['n/a', 'wt', 'ydf'], 'hue_order': ['ARS1 pass (%)', 'ARS1 block (%)', 'ARS1 pause (%)']}

barplot_kwargs = dict(barprops, **kwargs)
stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2.5, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='MCM_variant', y='probability', hue='outcome', data=data, ax=axes,
            palette=palette_qual, **barplot_kwargs)

sns.stripplot(x='MCM_variant', y='probability', hue='outcome', data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette_qual, **stripplot_kwargs)

axes.set_ylim(-6,120)
axes.set_yticks(np.arange(0,120,20))
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability (%)')


# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = df2[df2['nacl']=='150 mM'].groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n({nobs[0]})', f'Wildtype\n({nobs[1]})', f'Mcm3-YDF\n({nobs[2]})'])

# Fix the legend, keep only the first two legend elements
handles, labels = axes.get_legend_handles_labels()
lgd = axes.legend(handles[:3], ['pass', 'block', 'pause'], frameon=False, title='Outcome origin encounter',
               loc='center left', bbox_to_anchor=[1, 0.5],handletextpad=0.5)

lgd.legendHandles[0]._sizes = [40]
lgd.legendHandles[1]._sizes = [40]
lgd.legendHandles[2]._sizes = [40]
    
sns.despine()

plt.savefig(dir_out+'Cohesin_MCM_encounter_outcomes.svg',transparent=True)

In [None]:
fig,axes = plt.subplots(figsize=(5.34, 2.36))
data = _plot_df[_plot_df['nacl']=='150 mM']

barprops = {'edgecolor': 'k','linewidth': 1, 'facecolor':(1, 1, 1, 0),
            'errwidth':1, 'capsize':.1, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'hue_order': ['n/a', 'wt', 'ydf'], 'order': ['ARS1 pass (%)', 'ARS1 block (%)', 'ARS1 pause (%)']}

barplot_kwargs = dict(barprops, **kwargs)
stripplot_kwargs = dict({'linewidth': 0.3, 'size': 2.5, 'alpha': 0.4},
                        **kwargs)

sns.barplot(x='outcome', y='probability', hue='MCM_variant', data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

sns.stripplot(x='outcome', y='probability', hue='MCM_variant', data=data, ax=axes,
           dodge=True, jitter=0.25, palette=palette[2::2], **stripplot_kwargs)

axes.set_ylim(-6,120)
axes.set_yticks(np.arange(0,120,20))
axes.set_xlabel('Outcome origin encounter')
axes.set_ylabel('Probability (%)')


# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = df2[df2['nacl']=='150 mM'].groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels(['Pass', 'Block', 'Pause'])

# Fix the legend, keep only the first two legend elements
handles, labels = axes.get_legend_handles_labels()
lgd = axes.legend(handles[:3], [f'No ({nobs[0]})', f'Wildtype ({nobs[1]})', f'Mcm3-YDF ({nobs[2]})'], frameon=False, title='MCM at origin?',
               loc='center left', bbox_to_anchor=[1, 0.5],handletextpad=0.5)

lgd.legendHandles[0]._sizes = [40]
lgd.legendHandles[1]._sizes = [40]
lgd.legendHandles[2]._sizes = [40]


sns.despine()

plt.savefig(dir_out+'Cohesin_MCM_encounter_outcomes_v2.svg',transparent=True)

In [None]:
df_pause

### Cohesin direction after pause

In [None]:
fig,axes = plt.subplots()

data = df_pause

barprops = {'edgecolor': palette[2::2], 'linewidth': 1,
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order':['n/a','wt','ydf']}

barplot_kwargs = dict(barprops, **kwargs)


sns.barplot(x='MCM_variant', y='Cohesin_turnaround', data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability direction change after pause')
axes.set_xticklabels(['No', 'Wildtype', 'Mcm3-YDF'])

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n(n = 0)', f'Wildtype\n({nobs[0]})', f'Mcm3-YDF\n({nobs[1]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_pause_turnaround.svg',transparent=True)

In [None]:
fig,axes = plt.subplots()

data = df_pause

barprops = {'edgecolor': palette[2::2], 'linewidth': 1,
            'errwidth':1, 'capsize':.3, 'n_boot':10000, 'seed': 42, 'ci':95, 'errcolor':'.15'}

kwargs = {'order':['n/a','wt','ydf']}

barplot_kwargs = dict(barprops, **kwargs)


sns.barplot(x='MCM_variant', y='Cohesin_pause_pass', data=data, ax=axes,
            palette=palette[2::2], **barplot_kwargs)

axes.set_ylim(-0.05,1)
axes.set_xlabel('MCM at origin?')
axes.set_ylabel('Probability direction change after pause')
axes.set_xticklabels(['No', 'Wildtype', 'Mcm3-YDF'])

# alpha for boxplot filling (not required here)
for patch in axes.patches:
    r, g, b, a = patch.get_facecolor()
    patch.set_facecolor((r, g, b, .4))

# change errcolor
patches = axes.patches
lines_per_err = 3

for i, line in enumerate(axes.get_lines()):
    newcolor = patches[i // lines_per_err].get_edgecolor()
    line.set_color(newcolor)
    
#calculate nobs
nobs = data.groupby('MCM_variant').size()
nobs = [str(x) for x in nobs]
nobs = ["n = " + i for i in nobs]
   
#put nobs into plot together with xlabel
axes.set_xticklabels([f'No\n(n = 0)', f'Wildtype\n({nobs[0]})', f'Mcm3-YDF\n({nobs[1]})'])
    
sns.despine()

plt.savefig(dir_out+'Cohesin_pause_bypass.svg',transparent=True)


In [None]:
# Actual numbers of encounters (for figure legend)

In [None]:
df2.groupby(['nacl','MCM_variant'])['Visualized encounters'].sum()

In [None]:
df2.groupby(['nacl','MCM_variant'])['ARS1 pass'].sum()

In [None]:
df2.groupby(['nacl','MCM_variant'])['ARS1 block'].sum()

In [None]:
df2.groupby(['nacl','MCM_variant'])['ARS1 passing probability'].describe()

In [None]:
# strong barrier = 5 fold reduction in passing probability compared to cohesin alone
# => 500 mM NaCl: 0.115
# => 150 mM NaCl: 0.074

In [None]:
df2[df2['ARS1 passing probability'] < 0.115].groupby(['nacl','MCM_variant'])['ARS1 passing probability'].describe()

In [None]:
df2[df2['ARS1 passing probability'] < 0.074].groupby(['nacl','MCM_variant'])['ARS1 passing probability'].describe()

In [None]:
df2[df2['ARS1 passing probability'] == 0].groupby(['nacl','MCM_variant'])['ARS1 passing probability'].describe()

In [None]:
df2.groupby(['nacl','MCM_variant']).describe()[('Cohesin_1_','fraction_idle')]

In [None]:
df2.groupby(['nacl','MCM_variant','MCM_bleaching_steps']).describe()[('Cohesin_1_','fraction_idle')]

In [None]:
df2.groupby(['nacl','MCM_variant','MCM_bleaching_steps']).describe()['ARS1 passing probability']