# FIGURE 1

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from msresist.pre_processing import preprocessing
from msresist.figures.figure1 import plotClustergram, plotVarReplicates, plotReplicatesEndpoint, plotAveragedEndpoint, plotReplicatesFoldChangeEndpoint, plotVarReplicates, plotpca_explained, plotpca_ScoresLoadings_plotly
import warnings
warnings.simplefilter("ignore")

In [None]:
pd.set_option('display.max_colwidth', 1000)
# pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

# 1. Phosphorylation Measurements

### A) PC9 AXL WT SAMPLES 

In [None]:
ABC_wt = preprocessing(AXLwt=True, motifs=True, Vfilter=False, FCfilter=False, log2T=True, mc_row=True)

In [None]:
ABC_wt[ABC_wt["Abbv"].str.contains("GSK")]

In [None]:
data_wt = pd.concat([ABC_wt.iloc[:, 3:5], ABC_wt.iloc[:, 5:]], axis=1).set_index(["Abbv", "Position"])
plotClustergram(data_wt, "Clustergram AXL WT", max(np.max(np.abs(data_wt)) * 0.8))

In [None]:
fig, ax = fig, ax = plt.subplots(1, 1, figsize=(8, 5))

plotpca_explained(ax, data_wt, data_wt.shape[1])

In [None]:
plotpca_ScoresLoadings_plotly(data_wt.T, "PC9 cells +Erlotinib", "EGFR")

In [None]:
ABCwt_raw = preprocessing(AXLwt=True, rawdata=True, mc_row=True)

fig, ax = plt.subplots(1, 2, figsize=(15,6))
plotVarReplicates(ax, ABCwt_raw)

Variability of overlapping peptides across MS replicates. Pearson's correlation coefficient was computed for peptides overlapping across 2 MS replicates. For those overlapping 3 or more times, the Std was calculated.

### B) PC9 AXL MUTANTS TREATED WITH ERLOTINIB 

In [None]:
ABC_mutErl = preprocessing(Axlmuts_Erl=True, motifs=True, Vfilter=False, FCfilter=False, log2T=True, mc_col=True)

In [None]:
ABC_mutErl[ABC_mutErl["Abbv"].str.contains("RIPK1")]

In [None]:
data_mutErl = pd.concat([ABC_mutErl.iloc[:, 3:5], ABC_mutErl.iloc[:, 5:]], axis=1).set_index(["Abbv", "Position"])
plotClustergram(data_mutErl, "Clustergram AXL mutants +Erl", max(np.max(np.abs(data_mutErl)) * 0.8))

In [None]:
fig, ax = fig, ax = plt.subplots(1, 1, figsize=(8, 5))

plotpca_explained(ax, data_mutErl, data_mutErl.shape[1])

In [None]:
plotpca_ScoresLoadings_plotly(data_mutErl.T, "PC9 AXL mutants +Erlotinib", "EGFR")

### C) PC9 AXL MUTANTS TREATED WITH ERLOTINIB AND F154

In [None]:
ABC_mutF154 = preprocessing(Axlmuts_ErlF154=True, motifs=True, Vfilter=False, FCfilter=False, log2T=True, mc_row=True)

In [None]:
ABC_mutF154[ABC_mutF154["Abbv"].str.contains("RIP")]

In [None]:
data_mutF154 = pd.concat([ABC_mutF154.iloc[:, 3:5], ABC_mutF154.iloc[:, 6:]], axis=1).set_index(["Abbv", "Position"])
plotClustergram(data_mutF154, "Clustergram AXL mutants +Erl +F154",  max(np.max(np.abs(data_mutF154)) * 0.7))

In [None]:
fig, ax = fig, ax = plt.subplots(1, 1, figsize=(8, 5))

plotpca_explained(ax, data_mutF154, data_mutF154.shape[1])

In [None]:
plotpca_ScoresLoadings_plotly(data_mutF154.T, "PC9 +Erl +F154", "EGFR")

### D) CONCATENATING BOTH AXL MUTANT DATA SETS AND SETTING FOLD CHANGE TO PC9 + ERLOTINIB

In [None]:
ABC_mutErl_raw = pd.read_csv("./msresist/data/Raw/PC9_mutants_unstim_BR1_raw.csv")
ABC_mutF154_raw = pd.read_csv("./msresist/data/Raw/PC9_mutants_ActivatingAb_BR1_raw.csv")

In [None]:
from msresist.sequence_analysis import FormatName

ln, sn = FormatName(ABC_mutErl_raw)
ABC_mutErl_raw["Protein"] = ln
ABC_mutErl_raw = ABC_mutErl_raw.assign(Abbv=sn)

ln, sn = FormatName(ABC_mutF154_raw)
ABC_mutF154_raw["Protein"] = ln
ABC_mutF154_raw = ABC_mutF154_raw.assign(Abbv=sn)

In [None]:
from msresist.pre_processing import MapOverlappingPeptides

ABC_mutF154_raw.columns = ABC_mutErl_raw.columns

_, overlap, _ = MapOverlappingPeptides(pd.concat([ABC_mutF154_raw, ABC_mutErl_raw]))

In [None]:
ABC_mutErl = ABC_mutErl_raw.set_index(["Protein", "Sequence"])
ABC_mutF154 = ABC_mutF154_raw.set_index(["Protein", "Sequence"])

prots = list(overlap.iloc[:, 0])
seqs = list(overlap.iloc[:, 1])

ERL, F154 = [], []
for i in range(overlap.shape[0]):
    ERL.append(ABC_mutErl.loc[prots[i], seqs[i]])
    F154.append(ABC_mutF154.loc[prots[i], seqs[i]])

ERL = pd.DataFrame(ERL).reset_index()
F154 = pd.DataFrame(F154).reset_index().iloc[:, 1:]

In [None]:
ABC_mutF154.columns = list(ABC_mutF154.columns[:1]) + [col + str(" + F154") for col in list(ABC_mutF154.columns)[1:]]
F154.columns = ABC_mutF154.columns

In [None]:
conc = pd.concat([ERL, F154.iloc[:, 1:10]], axis=1).set_index("Abbv").iloc[:, 2:]
conc = np.log(conc.div(conc.iloc[:, 0], axis=0))

In [None]:
import seaborn as sns

g = sns.clustermap(conc, method="complete", robust=True, vmax=max(np.max(np.abs(conc))), vmin=-max(np.max(np.abs(conc))), cmap="bwr", col_cluster=True, cbar_kws={"label":"Normalized Signal"})
g.fig.suptitle( "Clustergram AXL mutants - Fold change to PC9+Erl", fontsize=17)
ax = g.ax_heatmap
ax.set_ylabel("");

## 2. Cell Viability Measurements:

Average between two replicates. Fold-change 72h vs 0h.

In [None]:
Y_cv1 = pd.read_csv('./msresist/data/Phenotypic_data/CV_raw3.csv').iloc[:30, :11]
Y_cv2 = pd.read_csv('./msresist/data/Phenotypic_data/CV_raw4.csv').iloc[:29, :11]

fig, ax = plt.subplots(1, 1, figsize=(8,6))
plotAveragedEndpoint(ax, Y_cv1, Y_cv2)

Raw Measurements of both replicates. Initial seeding was clearly off in Exp4.

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8,6))
plotReplicatesEndpoint(ax, Y_cv1, Y_cv2)

However, by fold-changing to t=0h, both replicates are notably reproducible:

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8,6))
plotReplicatesFoldChangeEndpoint(ax, Y_cv1, Y_cv2)

## Cell Viability levels of RIPK1-Transfected PC9 cells

In [None]:
Time = pd.read_csv('./msresist/data/Phenotypic_data/RIPK1_CV_PC9_samples_raw_byRow.csv', delimiter='\t').iloc[:, 1]
full = pd.read_csv('./msresist/data/Phenotypic_data/RIPK1_CV_PC9_samples_raw_byRow.csv', delimiter='\t').iloc[:, 2:]

In [None]:
avg_full = np.zeros((20, 24))
for i in range(full.shape[0]):
    for j in range(int(full.shape[1]/3)):
        avg_full[i, j] = full.iloc[i, j*3:(j+1)*3].mean()
        
avg_full = pd.DataFrame(avg_full)

In [None]:
full = pd.concat([Time, avg_full], axis=1)
full.columns = ["Elapsed", ".8_wt+fbs", ".8_wt-fbs", ".8_YF+fbs", ".8_YF-fbs", 
                "1_wt+fbs", "1_wt-fbs", "1_YF+fbs", "1_YF-fbs",
                "1.3_wt+fbs", "1.3_wt-fbs","1.3_YF+fbs", "1.3_YF-fbs",
                "1.5_wt+fbs", "1.5_wt-fbs","1.5_YF+fbs", "1.5_YF-fbs",
                "2_wt+fbs", "2_wt-fbs","2_YF+fbs", "2_YF-fbs",
                "3_wt+fbs", "3_wt-fbs", "3_YF+fbs", "3_YF-fbs"]

full = full.set_index("Elapsed")

In [None]:
Xidx = [i for i in range(0,24,2)]
full = full.iloc[:, Xidx]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(18,8))

for i in range(12):
    if i in [j for j in range(0, 12, 2)]:
        ax.plot(full.iloc[:, i], label=full.columns[i], linewidth=3, color="red")
    else:
        ax.plot(full.iloc[:, i], label=full.columns[i], linewidth=3, color="black")

ax.legend(loc=2);

In [None]:
full = full.reset_index()

In [None]:
import scipy as sp

Y_cv0 = full[full["Elapsed"] == 0].iloc[0, 1:]
Y_fc108 = full[full["Elapsed"] == 108].iloc[0, 1:] / Y_cv0

print(sp.stats.pearsonr(Y_cv0, Y_fc108)[1])
assert sp.stats.pearsonr(Y_cv0, Y_fc108)[1] > 0.05, "cell seeding effect"

for ii in range(1, full.columns.size):
    full.iloc[:, ii] /= full.iloc[0, ii]

In [None]:
full = full.set_index("Elapsed")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(18,8))

for i in range(12):
    if i in [j for j in range(0, 12, 2)]:
        ax.plot(full.iloc[:, i], label=full.columns[i], linewidth=3, color="red")
    else:
        ax.plot(full.iloc[:, i], label=full.columns[i], linewidth=3, color="black")

ax.legend(loc=2)

In [None]:
full = full.reset_index()

In [None]:
fig, ax = plt.subplots(1, figsize=(8,4))

range_ = np.linspace(1, 12, 12)

ax.set_title("Cell Viability - 72h")
ax.set_xticks(np.arange(1, 13, 1))
ax.set_xticklabels(full.columns[1:], rotation=45)
ax.bar(range_, full[full["Elapsed"] == 108].iloc[0, 1:], width=0.75, align='center', color="black")
ax.set_ylabel("fold-change confluency")