In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import statsmodels.formula.api as smf

In [2]:
dataFile=r'Z:\CookLab\Liu\20190816_organizedData_MCM_loading\signal_data_total_v1.csv'

In [3]:
myData=pd.read_csv(dataFile)

In [4]:
# add physical time based on DHB length
myData['age_track'] = (myData.DHB_len - 1) *10 / 60

In [5]:
# add day of experiments (with 'd' because otherwise seaborn fails to recognize it as a category)
myData['day']=[f"d{x.split('_')[0]}" for x in myData.file]
myData = myData.astype({"day": 'object'})
set(myData['day'])

{'d20190506',
 'd20190607',
 'd20190609',
 'd20190625',
 'd20190701',
 'd201907012',
 'd201907016'}

In [6]:
myData.head()

Unnamed: 0.1,Unnamed: 0,file,original cell name,Amy_mod,movie,x,y,ab1 (heterochromatin),ab1 ch,ab2 (MCM),...,nucleus_Otsu_inner_ch2,nucleus_Otsu_inner_ch3,nucleus_Otsu_outer_volume,nucleus_Otsu_outer_ch1,nucleus_Otsu_outer_ch2,nucleus_Otsu_outer_ch3,nucleus_otsu_sphere_inner_volume,nucleus_otsu_sphere_outer_volume,age_track,day
0,0,20190625_cell-00.czi,7LE1(B),1130/10,190619-cdc6-dhb-pcna002xy09,172,956,HP1B,1,MCM3,...,158746493.0,1246769000.0,322121.0,2099590000.0,1003047000.0,7687183000.0,0.0,0.0,0.666667,d20190625
1,1,20190625_cell-01.czi,7L-E1(T),0,190619-cdc6-dhb-pcna002xy09,158,906,HP1B,1,MCM3,...,183584081.0,1457227000.0,321844.0,1981743000.0,979031000.0,8090406000.0,0.0,0.0,0.666667,d20190625
2,2,20190625_cell-02.czi,7L-E2(B),0,190619-cdc6-dhb-pcna002xy09,832,252,HP1B,1,MCM3,...,229507070.0,1469553000.0,287538.0,2365673000.0,1121201000.0,8483717000.0,0.0,0.0,2.0,d20190625
3,3,20190625_cell-03.czi,7L-E2(T),0,190619-cdc6-dhb-pcna002xy09,888,160,HP1B,1,MCM3,...,232045174.0,1484302000.0,298383.0,2060637000.0,1039858000.0,7789178000.0,0.0,0.0,2.0,d20190625
4,4,20190625_cell-05.czi,7L-G2,0,190619-cdc6-dhb-pcna002xy09,172,324,HP1B,1,MCM3,...,291875166.0,2993056000.0,602406.0,5265607000.0,1386750000.0,16564570000.0,0.0,0.0,19.5,d20190625


In [7]:
indexNames = myData[ myData['hetChrom_newctl_volume'] == 0 ].index

In [8]:
myData=myData.drop(indexNames)

In [9]:
myTrendData=pd.DataFrame(columns=["trendName", "slope", "intercept", "rvalue", "pvalue", "stderr","rsquared"])

In [10]:
myTrendDatand=pd.DataFrame(columns=["trendName", "slope", "intercept", "rvalue", "pvalue", "stderr","rsquared"])

In [11]:
myDataSel=myData.loc[((myData.decon==True) & (myData['ab1 (heterochromatin)']=='HP1B')),:].copy()

In [12]:
myDataSelnd=myData.loc[((myData.decon==False) & (myData['ab1 (heterochromatin)']=='HP1B')),:].copy()

In [13]:
### doing it right
%matplotlib notebook

myDataSel['temp']= (myDataSel.hetChrom_sphere_inner_20_ch2/myDataSel.nucleus_Otsu_inner_ch2)/(myDataSel.hetChrom_sphere_inner_20_ch3/myDataSel.nucleus_Otsu_inner_ch3)

sn.scatterplot(x='molecularAge_ratio', y = 'temp', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') &
                                                           ['G1' in x for x in myDataSel.category]),:],hue='category')
plt.ylabel("MCM/DAPI (hetChrom/nucleus)")
plt.xlabel("molecular age [DHB]")
plt.title("MCM density in heterochromatin pixels")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'MCM density in heterochromatin pixels')

In [15]:
%matplotlib notebook

myDataSel['temp']= (myDataSel.hetChrom_sphere_inner_20_ch2/myDataSel.nucleus_Otsu_inner_ch2)/(myDataSel.hetChrom_sphere_inner_20_ch3/myDataSel.nucleus_Otsu_inner_ch3)
sn.regplot(x='molecularAge_ratio', y = 'temp', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') &
                                                           ['G1' in x for x in myDataSel.category]),:], order=2)
plt.ylabel("MCM/DAPI (hetChrom/nucleus)")
plt.xlabel("molecular age [DHB]")
plt.title("MCM density in heterochromatin pixels")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'MCM density in heterochromatin pixels')

In [18]:
%matplotlib notebook

myDataSel['temp']= (myDataSel.hetChrom_sphere_inner_20_ch2/myDataSel.nucleus_Otsu_inner_ch2)/(myDataSel.hetChrom_sphere_inner_20_ch3/myDataSel.nucleus_Otsu_inner_ch3)
sn.residplot(x='molecularAge_ratio', y = 'temp', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') &
                                                           ['G1' in x for x in myDataSel.category]),:], order=1)
plt.ylabel("MCM/DAPI (hetChrom/nucleus)")
plt.xlabel("molecular age [DHB]")
plt.title("MCM density in heterochromatin pixels")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'MCM density in heterochromatin pixels')

In [19]:
%matplotlib notebook

myDataSel['temp']= (myDataSel.hetChrom_sphere_inner_20_ch2/myDataSel.nucleus_Otsu_inner_ch2)/(myDataSel.hetChrom_sphere_inner_20_ch3/myDataSel.nucleus_Otsu_inner_ch3)
sn.residplot(x='molecularAge_ratio', y = 'temp', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') &
                                                           ['G1' in x for x in myDataSel.category]),:], order=2)
plt.ylabel("MCM/DAPI (hetChrom/nucleus)")
plt.xlabel("molecular age [DHB]")
plt.title("MCM density in heterochromatin pixels")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'MCM density in heterochromatin pixels')

In [65]:
%matplotlib notebook

myDataSel['temp']=(myDataSel.hetChrom_sphere_outer_20_ch2/myDataSel.nucleus_Otsu_outer_ch2)/(myDataSel.hetChrom_sphere_outer_20_ch3/myDataSel.nucleus_Otsu_outer_ch3)
sn.regplot(x='molecularAge_ratio', y = 'temp', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') &
                                                           ['G1' in x for x in myDataSel.category]),:],order=2)
plt.ylabel("MCM/DAPI (hetChrom/nucleus)")
plt.xlabel("molecular age [DHB]")
plt.title("MCM density in heterochromatin pixels")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'MCM density in heterochromatin pixels')

In [51]:
myDataSel['temp']= (myDataSel.hetChrom_sphere_inner_20_ch2/myDataSel.nucleus_Otsu_inner_ch2)/(myDataSel.hetChrom_sphere_inner_20_ch3/myDataSel.nucleus_Otsu_inner_ch3)


In [52]:
x = np.array(myDataSel.molecularAge_ratio)
y = np.array(myDataSel.temp)

In [53]:
coeff=np.polyfit(x,y,2)

In [54]:
model=np.poly1d(coeff)

In [55]:
results=smf.ols(formula='y~model(x)', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') & ['G1' in x for x in myDataSel.category]),:]).fit()

In [56]:
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.05
Model:,OLS,Adj. R-squared:,0.033
Method:,Least Squares,F-statistic:,2.912
Date:,"Wed, 22 Jan 2020",Prob (F-statistic):,0.0935
Time:,11:03:48,Log-Likelihood:,-398.17
No. Observations:,57,AIC:,800.3
Df Residuals:,55,BIC:,804.4
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,434.7499,59.098,7.356,0.000,316.314,553.186
model(x),-0.0040,0.002,-1.707,0.094,-0.009,0.001

0,1,2,3
Omnibus:,7.469,Durbin-Watson:,1.925
Prob(Omnibus):,0.024,Jarque-Bera (JB):,2.651
Skew:,0.111,Prob(JB):,0.266
Kurtosis:,1.967,Cond. No.,42100.0


In [31]:
quadratic

0.9624945257016663

In [44]:
stats

[array([0.4692583]),
 3,
 array([1.65686889, 0.49964791, 0.07167603]),
 2.5757174171303632e-14]

In [62]:
%matplotlib notebook

total=(myDataSel.hetChrom_20perc_ch2/myDataSel.nucleus_Otsu_ch2)/(myDataSel.hetChrom_20perc_ch3/myDataSel.nucleus_Otsu_ch3)
center=(myDataSel.hetChrom_sphere_inner_20_ch2/myDataSel.nucleus_Otsu_inner_ch2)/(myDataSel.hetChrom_sphere_inner_20_ch3/myDataSel.nucleus_Otsu_inner_ch3)
myDataSel['temp']=center/total
sn.regplot(x='molecularAge_ratio', y = 'temp', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') &
                                                           ['G1' in x for x in myDataSel.category]),:], order = 2)
plt.ylabel("MCM/DAPI (hetChrom/nucleus)")
plt.xlabel("molecular age [DHB]")
plt.title("MCM density in heterochromatin pixels")

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'MCM density in heterochromatin pixels')

In [63]:
x = np.array(myDataSel.molecularAge_ratio)
y = np.array(myDataSel.temp)
coeff=np.polyfit(x,y,2)
model=np.poly1d(coeff)
results=smf.ols(formula='y~model(x)', data=myDataSel.loc[((myDataSel.day!='d20190625') & (myDataSel.day!='d201907012') & ['G1' in x for x in myDataSel.category]),:]).fit()

In [64]:
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.05
Model:,OLS,Adj. R-squared:,0.033
Method:,Least Squares,F-statistic:,2.921
Date:,"Wed, 22 Jan 2020",Prob (F-statistic):,0.0931
Time:,11:16:41,Log-Likelihood:,-398.17
No. Observations:,57,AIC:,800.3
Df Residuals:,55,BIC:,804.4
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,433.9614,59.409,7.305,0.000,314.903,553.020
model(x),-0.0674,0.039,-1.709,0.093,-0.147,0.012

0,1,2,3
Omnibus:,7.46,Durbin-Watson:,1.925
Prob(Omnibus):,0.024,Jarque-Bera (JB):,2.649
Skew:,0.111,Prob(JB):,0.266
Kurtosis:,1.968,Cond. No.,2540.0
