In [1]:
import scipy as sp
from scipy import stats
import numpy as np 
import pandas as pd
from FileExporter import create_download_link
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
from msresist.mining_bioreplicates import MergeDfbyMean, AvsBacrossCond, AvsBvsCacrossCond
from msresist.plsr import FilteringOutPeptides, MeasuredVsPredicted_LOOCVplot
from msresist.comp_estimator import MyOwnKMEANS
from sklearn.cross_decomposition import PLSRegression
from sklearn.pipeline import Pipeline

In [7]:
#Biological Replicates, note that C contains TR1 and TR2 already (ask Jacq how she merged them)
A = pd.read_csv('./msresist/data/20180817_JG_AM_TMT10plex_R1_psms.csv', header=0)
B = pd.read_csv('./msresist/data/20190214_JG_AM_PC9_AXL_TMT10_AC28_R2_PSMs.csv', header=0)
C = pd.read_csv('./msresist/data/CombinedBR3_TR1&2.csv', header=0)


#Cell Viability
Y = np.array(pd.read_csv('./msresist/data/ydata.csv', header=0))
Y_cv = Y[:,2]
Y_cv = Y_cv[:10]

#Data
a, b, c = A.iloc[:, 2:12], B.iloc[:, 2:12], C.iloc[:, 2:12]
t = list(a.columns)

#Peptide Sequences
Aseqs, Bseqs, Cseqs = A.iloc[:, 0], B.iloc[:, 0], C.iloc[:, 0]

#Protein Names 
Anames, Bnames, Cnames = [], [], []
x = list(map(lambda  v : Anames.append(v.split("OS")[0]), A.iloc[:,1]))
y = list(map(lambda  v : Bnames.append(v.split("OS")[0]), B.iloc[:,1]))
z = list(map(lambda  v : Cnames.append(v.split("OS")[0]), C.iloc[:,1]))

A['Master Protein Descriptions'] = Anames
B['Master Protein Descriptions'] = Bnames
C['Master Protein Descriptions'] = Cnames

A.shape

(287, 12)

## Merging Biological Replicates

There are overlapping peptides across all 3 BRs... eg: gSTAENAEyLR-1, qNVPVINITyDSTPEDVk, gSHQISLDNPDyQQDFFPk, ... Same proteins have different descriptions which it's why I'm renaming them to .split("OS")[0]. Note 

In [3]:
A[A["Master Protein Descriptions"].str.contains("UFO")]

Unnamed: 0,peptide-phosphosite,Master Protein Descriptions,PC9,Erl,R428,Erl/R428,Erl+HGF,Erl+FGF,Erl+IGF,KO Erl,KO R428,KO Erl/R428
284,yVLcPSTTPSPAQPADR-1,Tyrosine-protein kinase receptor UFO,1,1.285279,0.566884,0.624717,1.182767,1.46029,1.5703,0.319098,0.373691,0.299368


In [4]:
B[B["Master Protein Descriptions"].str.contains("UFO")]

Unnamed: 0,peptide-phosphosite,Master Protein Descriptions,PC9,Erl,R428,Erl/R428,Erl+HGF,Erl+FGF,Erl+IGF,KO Erl,KO R428,KO Erl/R428
145,iYNGDyYR-1,Tyrosine-protein kinase receptor UFO,1,1.305922,0.974189,0.574212,1.353998,1.019262,0.989592,0.270721,0.240121,0.236747
148,kIYNGDyYR-1,Tyrosine-protein kinase receptor UFO,1,0.866954,0.736251,0.302111,1.159684,0.885693,0.77258,0.040421,0.049458,0.032329
358,yGEVFEPTVER-1,Tyrosine-protein kinase receptor UFO,1,1.129717,1.821515,1.306699,1.161714,1.181414,1.247977,0.200229,0.219713,0.180827
386,yVLcPSTTPSPAQPADR-1,Tyrosine-protein kinase receptor UFO,1,1.31488,0.410112,0.352842,1.127708,1.164169,1.352921,0.24873,0.222011,0.159555


In [5]:
C[C["Master Protein Descriptions"].str.contains("UFO")]

Unnamed: 0,peptide-phosphosite,Master Protein Descriptions,PC9,Erl,R428,Erl/R428,Erl+HGF,Erl+FGF,Erl+IGF,KO Erl,KO R428,KO Erl/R428
232,gQTPYPGVENSEIyDYLR-1,Tyrosine-protein kinase receptor UFO,1,1.368868,0.559906,0.597293,0.771363,1.000567,0.69522,0.512165,0.480649,0.673265
313,iYNGDYyR-1,Tyrosine-protein kinase receptor UFO,1,0.885824,0.663015,0.260392,0.865051,0.693606,0.602872,0.026783,0.027537,0.02327
325,kIYNGDyYR-1,Tyrosine-protein kinase receptor UFO,1,1.097803,0.854161,0.322835,1.30288,0.904276,0.821917,0.042604,0.043066,0.025683
751,yGEVFEPTVER-1,Tyrosine-protein kinase receptor UFO,1,0.974589,1.414364,0.933852,0.904417,1.024589,0.971516,0.121042,0.121686,0.081393
806,yVLcPSTTPSPAQPADR-1,Tyrosine-protein kinase receptor UFO,1,1.583323,0.446682,0.342265,0.709241,0.722113,0.929405,0.388418,0.207508,0.249281


In [6]:
ABC_avg = MergeDfbyMean(A,B,C,t)

ABC_avg[ABC_avg["Master Protein Descriptions"].str.contains("UFO")]

shape of concatenated matrix: (1496, 12)
total number of recurrences: {2: 246, 1: 620, 3: 128}
shape of averaged matrix: (994, 10)


KeyError: 'Master Protein Descriptions'

Note that the shape of ABC_avg is 994:*10*, meaning that the first two rows are not really considered when trying to use them with pandas

## Variability between overlapping peptides across Biological Replicates

In [None]:
# AvsB = AvsBacrossCond(A,B, t)
# plt.savefig("ABrawAcrossCond_NoOL.png")

In [None]:
# AvsC = AvsBacrossCond(A,C, t)
# plt.savefig("ACrawAcrossCond_NoOL.png")

In [None]:
# BvsC = AvsBacrossCond(B,C, t)
# plt.savefig("BCrawAcrossCond_NoOL.png")

In [None]:
# AvsBvsC = AvsBvsCacrossCond(A,B,C,t)
# plt.savefig("ABCrawAcrossCond_NoOL.png")