## First, load the data and make the plots as they match the Jensen and Janes

In [10]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import scipy.io
from matplotlib.colors import ListedColormap

# load the colormap 
# This can be used to setup the exact color map
array_map_file = 'arraymap.mat'
array_map = scipy.io.loadmat(array_map_file)
custom_cmap = ListedColormap(array_map['arraymap'])

In [11]:
#load the dataframe
data = pd.read_csv('Jensen_Janes_data_integral.csv', index_col=0)
vpr = pd.read_csv('Jensen_Janes_VPR.csv', index_col=0)

In [12]:
data

Unnamed: 0,AKT,ATF-2,CREB,ERK1/2,GSK3B,HSP27,IkBa,JNK,p38
DMSO,93.785493,292.2493,53.590789,153.767035,54.346361,125.895253,133.871113,109.090488,671.953301
API-2 (Akt),193.833056,316.724573,137.990806,74.482494,89.504915,134.768089,203.733509,95.28501,627.610264
BAY11-7085 (NF-kb),223.273403,385.029048,137.410578,141.255027,93.85318,146.612164,153.01634,97.657847,1055.572416
PP2 (Src),156.86662,308.374795,158.633846,106.30386,114.319547,128.855753,129.967561,107.640407,1071.483277
SB203580 (p38 MAPK),170.5697,259.48599,94.806097,172.91069,88.555254,42.698096,159.707154,84.03348,437.532073
SB216763 (GSK3B),214.647075,273.583114,93.389362,65.607369,63.697069,154.712477,171.347716,77.247852,799.771339
SP600125,178.555419,278.596625,213.396368,92.876112,75.142544,135.522981,188.617823,63.163854,1149.875904
U0126 (ERK1/2),238.880397,344.263644,91.29194,21.805747,114.316515,119.658786,214.930828,80.515353,642.487819
API-2+ BAY11-7085,147.766811,399.423231,106.128873,106.446268,77.326988,133.405933,146.05681,107.499558,775.355564
API-2+ SB203580,146.33135,330.644064,68.529038,113.682541,75.857098,27.411399,178.608387,70.860733,374.153616


In [13]:
#take the zscore of the data and the VPR and plot these.
df_VPR_zscore = vpr.apply(stats.zscore, axis=0, nan_policy='omit')
df_data_zscore = data.apply(stats.zscore, axis=0, nan_policy='omit')
df_VPR_zscore.style.background_gradient(cmap=custom_cmap)

Unnamed: 0,VPR
DMSO,3.0934
API-2 (Akt),1.014259
BAY11-7085 (NF-kb),0.681597
PP2 (Src),0.515265
SB203580 (p38 MAPK),-0.482722
SB216763 (GSK3B),0.473683
SP600125,-0.15006
U0126 (ERK1/2),0.473683
API-2+ BAY11-7085,0.598431
API-2+ SB203580,-0.399557


In [15]:
#display the zscored data with a heatmap (in the data frame), here picking a color scheme that approximates that used
# by Jensen and Janes
df_data_zscore = data.apply(stats.zscore, axis=0, nan_policy='omit')
df_data_zscore.style.background_gradient(cmap=custom_cmap)

Unnamed: 0,AKT,ATF-2,CREB,ERK1/2,GSK3B,HSP27,IkBa,JNK,p38
DMSO,-2.228994,-0.454046,-1.700243,0.508224,-1.703232,0.149996,-1.566977,0.068813,-0.763067
API-2 (Akt),0.545335,-0.160478,0.346111,-0.781313,0.035824,0.323657,1.313874,-0.33418,-0.854134
BAY11-7085 (NF-kb),1.361719,0.658799,0.332043,0.304721,0.250904,0.555473,-0.777503,-0.264915,0.02477
PP2 (Src),-0.479748,-0.260629,0.84662,-0.263749,1.263237,0.20794,-1.727945,0.026484,0.057446
SB203580 (p38 MAPK),-0.09976,-0.847025,-0.700941,0.81959,-0.011149,-1.47836,-0.5016,-0.662621,-1.244497
SB216763 (GSK3B),1.12251,-0.677938,-0.735291,-0.925664,-1.240716,0.714014,-0.021589,-0.860699,-0.500568
SP600125,0.121685,-0.617803,2.174386,-0.482147,-0.674585,0.338432,0.690562,-1.271822,0.21844
U0126 (ERK1/2),1.794503,0.169839,-0.786145,-1.638083,1.263087,0.027934,1.775608,-0.765318,-0.82358
API-2+ BAY11-7085,-0.732086,0.83145,-0.42641,-0.261432,-0.566536,0.296997,-1.064487,0.022373,-0.55071
API-2+ SB203580,-0.771892,0.006479,-1.338052,-0.143737,-0.639241,-1.777555,0.277812,-1.047144,-1.374657


## Setup the covariance matrix, find the eigenvectors and values 

In [None]:
S = zscore_df.cov()

In [None]:
eigVals, eigVec = np.linalg.eig(S)

In [None]:
#Check eigVals, are they sorted and in which order (ascending/descending) -- here shows it is in descending
# so the 0th eigenvector is PC1. 
eigVals

In [None]:
#Calculate the scores by matrix multiplication of the zscore data and the eigenvectors
T = np.matmul(zscore_df.values, eigVec)