In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import scipy.stats as sp

# Part 1: Load in data for S5A

In [2]:
full_df = pd.read_csv('data/FigS5A.csv')
full_df

Unnamed: 0.1,Unnamed: 0,Toehold,Toehold.1,Toehold.2,Toehold.3,Toehold.4,Shuffled Toehold k-mers,Shuffled Toehold k-mers.1,Shuffled Toehold k-mers.2,Shuffled Toehold k-mers.3,Shuffled Toehold k-mers.4,Scrambled Toehold Sequence,Scrambled Toehold Sequence.1,Scrambled Toehold Sequence.2,Scrambled Toehold Sequence.3,Scrambled Toehold Sequence.4
0,ON,0.368617,0.424466,0.423166,0.41176,0.385163,0.065189,0.078228,0.091717,0.098821,0.079979,0.072227,0.066992,0.074219,0.073462,0.080744
1,ON / OFF,0.271824,0.270961,0.289993,0.298607,0.287319,0.016207,0.045088,0.035757,0.040464,0.038282,0.0243,0.024051,0.011376,0.0307,0.048583
2,OFF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Part 2: Do statistical testing

In [3]:
# mcc ON 10% thresh
on_realtoehold = full_df.iloc[0, 1:6]
on_shufftoehold = full_df.iloc[0, 6:11]
on_scramtoehold = full_df.iloc[0, 11:16]

print('ON pred')
print(sp.ttest_ind(on_realtoehold, on_shufftoehold))
print(sp.ttest_ind(on_realtoehold, on_scramtoehold))

ON pred
Ttest_indResult(statistic=25.612298100043418, pvalue=5.789968032669404e-09)
Ttest_indResult(statistic=29.19876592870007, pvalue=2.0497397208142245e-09)


In [4]:
# mcc ON/OFF 10% thresh
onoff_realtoehold = full_df.iloc[1, 1:6]
onoff_shufftoehold = full_df.iloc[1, 6:11]
onoff_scramtoehold = full_df.iloc[1, 11:16]

print('ON/OFF pred')
print(sp.ttest_ind(onoff_realtoehold, onoff_shufftoehold))
print(sp.ttest_ind(onoff_realtoehold, onoff_scramtoehold))

ON/OFF pred
Ttest_indResult(statistic=33.91742343781733, pvalue=6.237343444602218e-10)
Ttest_indResult(statistic=31.567676637776742, pvalue=1.1035066773642516e-09)


In [5]:
# mcc OFF 10% thresh
off_realtoehold = full_df.iloc[2, 1:6]
off_shufftoehold = full_df.iloc[2, 6:11]
off_scramtoehold = full_df.iloc[2, 11:16]

print('OFF pred')
print(sp.ttest_ind(off_realtoehold, off_shufftoehold))
print(sp.ttest_ind(off_realtoehold, off_scramtoehold))

OFF pred
Ttest_indResult(statistic=nan, pvalue=nan)
Ttest_indResult(statistic=nan, pvalue=nan)


# Part 3: Load in data for S5B

In [6]:
full_df = pd.read_csv('data/FigS5B.csv')
full_df

Unnamed: 0.1,Unnamed: 0,Toehold,Toehold.1,Toehold.2,Toehold.3,Toehold.4,Shuffled Toehold k-mers,Shuffled Toehold k-mers.1,Shuffled Toehold k-mers.2,Shuffled Toehold k-mers.3,Shuffled Toehold k-mers.4,Scrambled Toehold Sequence,Scrambled Toehold Sequence.1,Scrambled Toehold Sequence.2,Scrambled Toehold Sequence.3,Scrambled Toehold Sequence.4
0,ON,0.720682,0.704709,0.707211,0.70923,0.707355,0.189873,0.215132,0.216082,0.200787,0.205594,0.109024,0.127012,0.117571,0.114119,0.111898
1,ON / OFF,0.528955,0.528488,0.527344,0.522644,0.520776,0.111081,0.137318,0.141937,0.1349,0.140129,0.055276,0.045229,0.048286,0.0496,0.053966
2,OFF,0.55926,0.56603,0.565606,0.565512,0.566069,0.172267,0.175266,0.166109,0.17129,0.171274,0.089023,0.083355,0.07824,0.077234,0.076748


# Part 4: Do statistical testing

In [7]:
# mcc ON 50% thresh
on_realtoehold = full_df.iloc[0, 1:6]
on_shufftoehold = full_df.iloc[0, 6:11]
on_scramtoehold = full_df.iloc[0, 11:16]

print('ON pred')
print(sp.ttest_ind(on_realtoehold, on_shufftoehold))
print(sp.ttest_ind(on_realtoehold, on_scramtoehold))

ON pred
Ttest_indResult(statistic=89.96775641702082, pvalue=2.6000301567439585e-13)
Ttest_indResult(statistic=141.95557029434417, pvalue=6.7823148617592595e-15)


In [8]:
# mcc ON/OFF 50% thresh
onoff_realtoehold = full_df.iloc[1, 1:6]
onoff_shufftoehold = full_df.iloc[1, 6:11]
onoff_scramtoehold = full_df.iloc[1, 11:16]

print('ON/OFF pred')
print(sp.ttest_ind(onoff_realtoehold, onoff_shufftoehold))
print(sp.ttest_ind(onoff_realtoehold, onoff_scramtoehold))

ON/OFF pred
Ttest_indResult(statistic=66.93156359699906, pvalue=2.7629988801037532e-12)
Ttest_indResult(statistic=191.642765353974, pvalue=6.150891533006469e-16)


In [9]:
# mcc OFF 50% thresh
off_realtoehold = full_df.iloc[2, 1:6]
off_shufftoehold = full_df.iloc[2, 6:11]
off_scramtoehold = full_df.iloc[2, 11:16]

print('OFF pred')
print(sp.ttest_ind(off_realtoehold, off_shufftoehold))
print(sp.ttest_ind(off_realtoehold, off_scramtoehold))

OFF pred
Ttest_indResult(statistic=198.990525799049, pvalue=4.552426941465725e-16)
Ttest_indResult(statistic=180.04959895242163, pvalue=1.013199521010825e-15)
