In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from tqdm.notebook import tqdm
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score, LeaveOneGroupOut, train_test_split
from sklearn.svm import SVC
from scipy.stats import zscore, pearsonr, uniform
from time import time


#sb.set(font_scale = 2.5)


# Plotting settings
plt.rcParams.update({'font.size': 15, 'lines.linewidth': 2.5})
kwargs = {'linewidth': 4, 'marker':'o'} # For profile plots

In [2]:
file = pd.read_hdf('OS_0006_LFQ_Proteotypic_PGs_Clean_TidyPgMatrix.hdf')

In [8]:
file = file.stack(['Treatment', 'Time', 'Replicate']).dropna(0)
file

  file = file.stack(['Treatment', 'Time', 'Replicate']).dropna(0)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Fraction,03k,05k,12k,24k,79k
Pg,Treatment,Time,Replicate,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A0AV96,DMSO,30min,9,4.448592e+05,7.047565e+05,3.425534e+06,4.282439e+06,2.955941e+06
A0AV96,DMSO,60min,9,2.116094e+06,1.568795e+06,1.488393e+06,4.809955e+06,2.859876e+06
A0AV96,DMSO,90min,9,1.601095e+06,1.549022e+06,2.293508e+06,3.554049e+06,2.346646e+06
A0AV96,diABZI,30min,9,1.184941e+06,2.524886e+06,1.509693e+06,2.335732e+06,4.389568e+06
A0AV96,diABZI,60min,9,2.164302e+06,2.551109e+06,1.798593e+06,2.516727e+06,3.025456e+06
...,...,...,...,...,...,...,...,...
Q96EV8,diABZI,30min,9,1.520081e+06,1.177277e+06,8.305932e+05,8.058098e+05,2.018071e+06
Q96LA8,DMSO,90min,9,1.734624e+06,1.064101e+06,9.923594e+05,1.229581e+06,1.366450e+06
Q9HA47,diABZI,90min,9,7.417266e+05,2.629410e+05,3.636802e+05,3.839115e+05,4.082164e+05
Q9UBT6,DMSO,90min,9,6.154664e+05,5.067947e+05,5.540175e+05,7.686722e+05,6.525012e+05


In [11]:
sums = file.sum(axis=1)
sums

Pg      Treatment  Time   Replicate
A0AV96  DMSO       30min  9            1.181353e+07
                   60min  9            1.284311e+07
                   90min  9            1.134432e+07
        diABZI     30min  9            1.194482e+07
                   60min  9            1.205619e+07
                                           ...     
Q96EV8  diABZI     30min  9            6.351832e+06
Q96LA8  DMSO       90min  9            6.387116e+06
Q9HA47  diABZI     90min  9            2.160476e+06
Q9UBT6  DMSO       90min  9            3.097452e+06
        diABZI     90min  9            2.603018e+06
Length: 30965, dtype: float64

In [17]:
org_profiles = (
    file.unstack(
        ['Treatment', 'Time', 'Replicate']) / file.unstack(['Treatment', 'Time', 'Replicate']
                                                                ).groupby(
        ['Treatment', 'Time', 'Replicate'], axis=1).transform('sum')
).stack(['Treatment', 'Time', 'Replicate'])

In [18]:
org_profiles

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Fraction,03k,05k,12k,24k,79k
Pg,Treatment,Time,Replicate,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A0AV96,DMSO,30min,9,0.037657,0.059657,0.289967,0.362503,0.250217
A0AV96,DMSO,60min,9,0.164765,0.122151,0.115890,0.374516,0.222678
A0AV96,DMSO,90min,9,0.141136,0.136546,0.202172,0.313289,0.206856
A0AV96,diABZI,30min,9,0.099201,0.211379,0.126389,0.195544,0.367487
A0AV96,diABZI,60min,9,0.179518,0.211602,0.149184,0.208750,0.250946
...,...,...,...,...,...,...,...,...
Q96EV8,diABZI,30min,9,0.239314,0.185344,0.130764,0.126863,0.317715
Q96LA8,DMSO,90min,9,0.271582,0.166601,0.155369,0.192510,0.213938
Q9HA47,diABZI,90min,9,0.343316,0.121705,0.168333,0.177698,0.188947
Q9UBT6,DMSO,90min,9,0.198701,0.163617,0.178862,0.248163,0.210657


# Calculation of distances

In [21]:
delta_profiles = org_profiles.loc[:, 'diABZI', :, :, :] - org_profiles.loc[:, 'DMSO', :, :, :]
delta_profiles

Unnamed: 0_level_0,Unnamed: 1_level_0,Fraction,03k,05k,12k,24k,79k
Pg,Time,Replicate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A0AV96,30min,9,0.061544,0.151722,-0.163578,-0.166959,0.117271
A0AV96,60min,9,0.014753,0.089451,0.033294,-0.165766,0.028269
A0AV96,90min,9,-0.036605,-0.068003,0.061515,-0.088851,0.131945
A0AVT1,30min,9,0.037562,0.080994,-0.039264,-0.092894,0.013601
A0AVT1,60min,9,-0.098221,0.077537,0.038591,-0.010779,-0.007129
...,...,...,...,...,...,...,...
Q9Y6X9,60min,9,0.002652,0.032091,-0.009012,-0.021996,-0.003735
Q9Y6X9,90min,9,-0.011378,-0.052534,-0.014785,-0.045782,0.124479
Q9Y6Y8,30min,9,0.053481,0.098634,-0.010576,-0.234118,0.092579
Q9Y6Y8,60min,9,-0.020859,0.109952,0.057011,-0.167183,0.021080


In [26]:
delta_profiles['L2'] = np.sqrt(np.square(delta_profiles).sum(1))
delta_profiles

Unnamed: 0_level_0,Unnamed: 1_level_0,Fraction,03k,05k,12k,24k,79k,L2
Pg,Time,Replicate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A0AV96,30min,9,0.061544,0.151722,-0.163578,-0.166959,0.117271,0.436333
A0AV96,60min,9,0.014753,0.089451,0.033294,-0.165766,0.028269,0.274245
A0AV96,90min,9,-0.036605,-0.068003,0.061515,-0.088851,0.131945,0.264773
A0AVT1,30min,9,0.037562,0.080994,-0.039264,-0.092894,0.013601,0.191451
A0AVT1,60min,9,-0.098221,0.077537,0.038591,-0.010779,-0.007129,0.186095
...,...,...,...,...,...,...,...,...
Q9Y6X9,60min,9,0.002652,0.032091,-0.009012,-0.021996,-0.003735,0.056849
Q9Y6X9,90min,9,-0.011378,-0.052534,-0.014785,-0.045782,0.124479,0.203464
Q9Y6Y8,30min,9,0.053481,0.098634,-0.010576,-0.234118,0.092579,0.390084
Q9Y6Y8,60min,9,-0.020859,0.109952,0.057011,-0.167183,0.021080,0.297219


In [30]:
org_profiles.loc['Q86WV6']

Unnamed: 0_level_0,Unnamed: 1_level_0,Fraction,03k,05k,12k,24k,79k
Treatment,Time,Replicate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
DMSO,30min,9,0.155008,0.213866,0.36772,0.211771,0.051635
DMSO,60min,9,0.141403,0.268327,0.330746,0.22117,0.038355
DMSO,90min,9,0.145457,0.286267,0.355995,0.181008,0.031273
diABZI,30min,9,0.250186,0.373595,0.217039,0.097526,0.061654
diABZI,60min,9,0.159458,0.334389,0.255644,0.153844,0.096665
diABZI,90min,9,0.135066,0.153571,0.329306,0.198263,0.183794


In [47]:
l2_distances_to_sting = np.sqrt(np.square(org_profiles - org_profiles.loc['Q86WV6']).sum(1)).sort_values().to_frame('L2_dist')
l2_distances_to_sting

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,L2_dist
Treatment,Time,Replicate,Pg,Unnamed: 4_level_1
DMSO,90min,9,Q86WV6,0.000000
DMSO,60min,9,Q86WV6,0.000000
diABZI,90min,9,Q86WV6,0.000000
diABZI,60min,9,Q86WV6,0.000000
diABZI,30min,9,Q86WV6,0.000000
diABZI,30min,9,...,...
diABZI,30min,9,Q76L83,0.900324
DMSO,90min,9,Q1ED39,0.904399
DMSO,30min,9,O43683,0.905823
DMSO,30min,9,Q9H0W8,0.970317


In [48]:
l2_distances_to_sting.loc[:, :, :, 'Q76L83', :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,L2_dist
Treatment,Time,Replicate,Unnamed: 3_level_1
DMSO,90min,9,0.303246
diABZI,60min,9,0.409689
diABZI,90min,9,0.6126
DMSO,60min,9,0.775856
DMSO,30min,9,0.805815
diABZI,30min,9,0.900324
