In [9]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [10]:

# Read the CSV file
data_RAAM_raw = pd.read_csv("Data_AkrongAndMcGahan.csv")
data_RAAM_raw


Unnamed: 0,library,B1_1,B2_1,B1_2,B2_2,aa,pos,mut
0,36062,28493,30860,93150,58496,,-1,
1,1401,7067,1274,43195,46,T,145,S
2,1304,82,319,2163,1778,T,145,L
3,1256,0,0,16,0,T,145,P
4,1203,46,1063,434,0,G,148,L
...,...,...,...,...,...,...,...,...
424,11,0,0,0,0,L,151,Y
425,10,0,0,0,0,R,153,D
426,10,0,19,86,12,P,146,T
427,0,0,90,0,0,S,158,N


In [11]:
# Filter and preprocess RAAM_1
RAAM_1 = data_RAAM_raw[['pos', 'mut', 'B1_1', 'B2_1', 'library']].copy()
RAAM_1 = RAAM_1[(RAAM_1['library'] != 0) & ((RAAM_1['B1_1'] != 0) | (RAAM_1['B2_1'] != 0))]
RAAM_1 = RAAM_1.applymap(lambda x: 0.1 if x == 0 else x)

# Generalize
RAAM_1['B1_1dMut'] = (RAAM_1['B1_1'] / RAAM_1['B1_1'].iloc[0]) / (RAAM_1['library'] / RAAM_1['library'].iloc[0])
RAAM_1['B2_1dMut'] = (RAAM_1['B2_1'] / RAAM_1['B2_1'].iloc[0]) / (RAAM_1['library'] / RAAM_1['library'].iloc[0])
RAAM_1['changeB2_B1'] = RAAM_1['B2_1dMut'] - RAAM_1['B1_1dMut']

# Filter and preprocess RAAM_2
RAAM_2 = data_RAAM_raw[['pos', 'mut', 'B1_2', 'B2_2', 'library']].copy()
RAAM_2 = RAAM_2[(RAAM_2['library'] != 0) & ((RAAM_2['B1_2'] != 0) | (RAAM_2['B2_2'] != 0))]
RAAM_2 = RAAM_2.applymap(lambda x: 0.1 if x == 0 else x)

# Generalize
RAAM_2['B1_2dMut'] = (RAAM_2['B1_2'] / RAAM_2['B1_2'].iloc[0]) / (RAAM_2['library'] / RAAM_2['library'].iloc[0])
RAAM_2['B2_2dMut'] = (RAAM_2['B2_2'] / RAAM_2['B2_2'].iloc[0]) / (RAAM_2['library'] / RAAM_2['library'].iloc[0])
RAAM_2['changeB2_B1'] = RAAM_2['B2_2dMut'] - RAAM_2['B1_2dMut']

# Concatenate RAAM_1 and RAAM_2
data_RAAM = pd.concat([RAAM_1[['pos', 'mut', 'changeB2_B1']], RAAM_2[['pos', 'mut', 'changeB2_B1']]])

# Sort by 'pos'
data_RAAM = data_RAAM.sort_values(by='pos')

# Convert 'pos' to a string
data_RAAM['pos'] = data_RAAM['pos'].astype(str)

data_RAAM

Unnamed: 0,pos,mut,changeB2_B1
0,-1,,0.000000
0,-1,,0.000000
226,136,S,-0.080901
289,136,T,-3.270944
285,136,D,0.001655
...,...,...,...
329,158,Y,-0.154810
53,158,R,-2.060509
302,158,C,-3.817117
72,158,V,-0.549014


In [16]:
# Fit a linear model with interaction using ols
model = ols('changeB2_B1 ~ pos * mut', data=data_RAAM).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:            changeB2_B1   R-squared:                       0.699
Model:                            OLS   Adj. R-squared:                  0.081
Method:                 Least Squares   F-statistic:                     1.131
Date:                Sat, 23 Sep 2023   Prob (F-statistic):              0.226
Time:                        16:58:29   Log-Likelihood:                -923.62
No. Observations:                 364   AIC:                             2337.
Df Residuals:                     119   BIC:                             3292.
Df Model:                         244                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept              -1.1812    