In [19]:
import os
import ipywidgets as widgets
from IPython.display import display

# PanCan TRIBE2 analysis

## Preliminary work
A baseline for pathway mutation averages needed to be established. In order to do this, a parser for files shipped with [PathwayMapper](http://www.pathwaymapper.org/) was hand-rolled along with a custom data structure describing the pathway contents and hierarchy. Here we show an example of a parsed pathway.

In [20]:
import pathways as lpw

@widgets.interact(pathway=[filename for filename in os.listdir('./pathways')])
def show_pathway(pathway):
    pw = lpw.parse_pathway('./pathways/' + pathway)
    print(f"Name: {pw[0]}, Contents:\n{pw[1]}")

interactive(children=(Dropdown(description='pathway', options=('TGF-Beta.txt', 'HIPPO.txt', 'WNT.txt', 'NRF2.t…

The average mutation is calculated for any given pathway and patient by only considering pathogenic mutations and the maximum mutation percentage detected. Complexes or families count as a single gene towards the overall average mutation (no weights applied).

The average mutation on all pathways for a random patient follows:

In [21]:
import pandas
from analysis import calculate_patient_mutations

pathways = []
for pw in os.listdir('./pathways'):
    pathway = lpw.parse_pathway('./pathways/' + pw)
    pathways.append(pathway)

patients_log = pandas.read_csv('TRIBE2_db.csv')
mutations_data = pandas.read_csv('TRIBE2_seq_res.csv')

result = calculate_patient_mutations('CB224', mutations_data, pathways)
print(result)

{'TGF-Beta': 0.0, 'HIPPO': 0.0, 'WNT': 2.3076923076923075, 'NRF2': 0.0, 'MYC': 0.0, 'RTK-RAS': 3.4166666666666665, 'TP53': 4.166666666666667, 'NOTCH': 0.0, 'PI3K': 0.0, 'Cell Cycle': 2.5}


Patients have been split into two groups, according to the treatment they had (arm0, arm1). This will hold for all future analysis.
Here we show statistics about mutations for the two groups of patients and how the mutations for each pathway correlate with **dpfs**

In [None]:
from analysis import process_patients

arm0_df = process_patients(patients_log[patients_log['arm'] == 0]['PatientFirstName'])
arm0_df.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm0_db_df = arm0_df.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df[['dpfs'] + [pw[0] for pw in pathways]].corr().iloc[0]}")

In [None]:
arm1_df = process_patients(patients_log[patients_log['arm'] == 1]['PatientFirstName'])
arm1_df.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm1_db_df = arm1_df.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df[['dpfs'] + [pw[0] for pw in pathways]].corr().iloc[0]}")

## Conversion of pathway data
Pathways are parsed from pathway files shipped with [PathwayMapper](http://www.pathwaymapper.org/). At this stage, the obtained data is transformed into a NetworkX graph giving each gene its own vertex: complexes and families are not represented explicitly. The resulting graph is directed.

In [None]:
import networkx as nx
import pathways_nx as pnx
import matplotlib.pyplot as plt
import pylab
import logging as log

plt.rcParams['figure.dpi'] = 90

@widgets.interact(pathway=[filename for filename in os.listdir('./pathways')])
def show_pathway(pathway):
    pw = pnx.pathway_to_nx('pathways/' + pathway)

    edge_labels=dict([((u,v,),d['label'])
                 for u,v,d in pw[1].edges(data=True)])
    labels = nx.get_node_attributes(pw[1], 'label')
    pos=nx.spring_layout(pw[1], 8)

    plt.figure(1,figsize=(12,12)) 
    nx.draw_networkx_edge_labels(pw[1], pos, edge_labels=edge_labels)
    nx.draw(pw[1], pos, node_size=1700, labels=labels, with_labels=True, node_shape="o",  node_color="none", bbox=dict(facecolor="skyblue", edgecolor='black', boxstyle='round,pad=0.4'))

## Computing weighted averages
In order to improve the correlation between **dpfs** and pathway mutations, we can employ weights on each gene. These are derived from various centrality measures

In [1]:
import pathways_nx as pnx
import networkx as nx
import os
import pandas

nx_pathways = []
for filename in os.listdir('./pathways'):
    nx_pathways.append(pnx.pathway_to_nx('pathways/' + filename))

patients_log = pandas.read_csv('TRIBE2_db.csv')
mutations_data = pandas.read_csv('TRIBE2_seq_res.csv')

### In-degree

In [3]:
from analysis_nx import process_patients_with_f

arm0_df_indeg = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.in_degree_centrality, nx_pathways, mutations_data)
arm0_df_indeg.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,2.626929,0.0,0.655062,0.91358,0.0,1.49255,18.763889,1.163522,2.35571,9.703704
std,4.953057,0.0,1.420377,5.598797,0.0,1.04268,12.268532,2.299567,3.614993,6.563132
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.827586,10.125,0.0,0.0,5.75
50%,0.0,0.0,0.0,0.0,0.0,1.362069,21.25,0.0,0.0,10.75
75%,2.265625,0.0,0.0,0.0,0.0,2.103448,27.5,0.613208,4.53125,14.0
max,20.75,0.0,7.34,44.0,0.0,4.781609,43.0,11.09434,12.708333,30.0


In [10]:
pandas.set_option("display.precision", 11)
arm0_db_d_indegf = arm0_df_indeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_d_indegf[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.05180275226
HIPPO                   NaN
WNT          -0.02174170470
NRF2          0.26055653829
MYC                     NaN
RTK-RAS      -0.11310601928
TP53         -0.05435628743
NOTCH         0.07403685237
PI3K          0.03503757492
Cell Cycle   -0.09226936015
Name: dpfs, dtype: float64


In [7]:
from analysis_nx import process_patients_with_f

arm1_df_indeg = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.in_degree_centrality, nx_pathways, mutations_data)
arm1_df_indeg.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,1.6338619403,0.0,0.66671641791,0.05970149254,0.03616532721,1.6881111683,17.61473880597,1.1117994931,2.22388059701,8.92723880597
std,4.22081387824,0.0,1.39379493649,0.69109474047,0.25563721307,1.17443857065,13.64440010607,2.13433296243,3.71690587683,6.72526270743
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.93103448276,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,1.51724137931,20.75,0.0,0.0,10.625
75%,0.0,0.0,0.0,0.0,0.0,2.27586206897,27.5,0.81132075472,4.125,13.75
max,18.25,0.0,7.54,8.0,2.19230769231,5.83908045977,44.125,10.15094339623,17.91666666667,21.0


In [13]:
pandas.set_option("display.precision", 11)
arm1_db_df_indeg = arm1_df_indeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_indeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.06616595990
HIPPO                   NaN
WNT          -0.05256678378
NRF2          0.08352453560
MYC          -0.05265988914
RTK-RAS       0.14794631496
TP53         -0.11281684289
NOTCH        -0.02592021718
PI3K          0.13209202648
Cell Cycle   -0.12639137193
Name: dpfs, dtype: float64


### Out-degree

In [12]:
from analysis_nx import process_patients_with_f

arm0_df_outdeg = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.out_degree_centrality, nx_pathways, mutations_data)
arm0_df_outdeg.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,0.19097222222,0.01037037037,0.9837037037,0.16666666667,0.11490978158,1.51035901802,0.82330246914,0.70288842301,1.47325102881,10.43904320988
std,1.15780437321,0.07741867213,0.9006504819,1.49948231232,0.46489043298,1.86145914401,2.40214727723,1.52381859048,2.16942021878,6.4232242763
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.485,0.0,0.0,0.8275862069,0.0,0.0,0.0,5.75
50%,0.0,0.0,0.78,0.0,0.0,1.25287356322,0.0,0.0,0.0,11.5
75%,0.0,0.0,1.28,0.0,0.0,1.75,0.0,0.0,2.75,14.84375
max,9.9375,0.68,5.28,14.5,2.69230769231,16.58620689655,16.5,6.81132075472,11.08333333333,25.0


In [14]:
pandas.set_option("display.precision", 11)
arm0_db_df_outdeg = arm0_df_outdeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_outdeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta      0.02890916087
HIPPO         0.04245896105
WNT           0.09316713412
NRF2         -0.04613419523
MYC           0.26819358259
RTK-RAS      -0.09855698721
TP53          0.00559572918
NOTCH         0.06066446533
PI3K          0.09017649935
Cell Cycle   -0.04413363227
Name: dpfs, dtype: float64


In [15]:
from analysis_nx import process_patients_with_f

arm1_df_outdeg = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.out_degree_centrality, nx_pathways, mutations_data)
arm1_df_outdeg.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,0.32322761194,0.01253731343,0.97701492537,0.69402985075,0.20034443169,1.83136043918,0.62313432836,0.69050971557,1.42133084577,9.61660447761
std,1.56543007449,0.10784761295,0.94011519839,3.79993591545,0.68159037286,2.38327426401,2.32614396399,1.50015255307,2.27909035171,6.81259610129
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.52,0.0,0.0,0.81034482759,0.0,0.0,0.0,2.9375
50%,0.0,0.0,0.77,0.0,0.0,1.33908045977,0.0,0.0,0.0,11.125
75%,0.0,0.0,1.19,0.0,0.0,2.0,0.0,0.0,2.3125,14.75
max,10.3125,1.12,5.32,26.0,4.38461538462,14.68965517241,20.5,7.94339622642,10.75,24.375


In [16]:
pandas.set_option("display.precision", 11)
arm1_db_df_outdeg = arm1_df_outdeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_outdeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.06184457498
HIPPO         0.07618156944
WNT           0.01182285336
NRF2          0.10812386832
MYC          -0.04663580626
RTK-RAS      -0.02274674199
TP53          0.06770914810
NOTCH         0.08720395888
PI3K          0.07505846557
Cell Cycle   -0.13821505997
Name: dpfs, dtype: float64


### Betweenness

In [22]:
from analysis_nx import process_patients_with_f

arm0_df_bet = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.betweenness_centrality, nx_pathways, mutations_data)
arm0_df_bet.describe()

  perc_mutation = weights.mul(patient_mutations, fill_value=np.float64(0.0)).sum() / weights.sum()


Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,162.0,162.0,162.0,155.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,0.25462962963,0.0,0.17548500882,0.0,0.0,2.22161352986,0.33641975309,1.125,2.45004572474,18.60141093474
std,1.54373916427,0.0,0.58189353177,0.0,0.0,1.47314564193,2.53168139873,3.50700828953,4.10633802364,12.28116269145
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,1.37561924982,0.0,0.0,0.0,10.17857142857
50%,0.0,0.0,0.0,0.0,0.0,2.34394904459,0.0,0.0,0.0,20.75
75%,0.0,0.0,0.0,0.0,0.0,3.14968152866,0.0,0.0,5.0,27.5
max,13.25,0.0,3.09523809524,0.0,0.0,6.56121726822,22.5,18.75,16.94444444444,43.0


In [23]:
pandas.set_option("display.precision", 11)
arm0_db_df_bet = arm0_df_bet.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_bet[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta      0.02890916087
HIPPO                   NaN
WNT           0.01694620962
NRF2                    NaN
MYC                     NaN
RTK-RAS      -0.10656893393
TP53         -0.04865032728
NOTCH         0.06796449064
PI3K         -0.00252672381
Cell Cycle   -0.05919601305
Name: dpfs, dtype: float64


In [29]:
from analysis_nx import process_patients_with_f

arm1_df_bet = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.betweenness_centrality, nx_pathways, mutations_data)
arm1_df_bet.describe()

  perc_mutation = weights.mul(patient_mutations, fill_value=np.float64(0.0)).sum() / weights.sum()


Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,134.0,134.0,134.0,128.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,0.43097014925,0.0,0.08742004264,0.0,0.21699196326,2.3366007894,0.63805970149,1.75373134328,2.2878489497,17.4552238806
std,2.08724009931,0.0,0.41345394441,0.0,1.53382327841,1.5468721006,3.58641536277,4.72445496266,4.160560705,13.60139625276
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,1.40207006369,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,2.36883698986,0.0,0.0,0.0,20.75
75%,0.0,0.0,0.0,0.0,0.0,3.25123849965,0.0,0.0,4.09722222222,27.5
max,13.75,0.0,2.52380952381,0.0,13.15384615385,6.46956829441,27.5,22.75,23.88888888889,42.0


In [25]:
pandas.set_option("display.precision", 11)
arm1_db_df_bet = arm1_df_bet.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_bet[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta      0.02890916087
HIPPO                   NaN
WNT           0.01694620962
NRF2                    NaN
MYC                     NaN
RTK-RAS      -0.10656893393
TP53         -0.04865032728
NOTCH         0.06796449064
PI3K         -0.00252672381
Cell Cycle   -0.05919601305
Name: dpfs, dtype: float64


### Closeness

In [27]:
from analysis_nx import process_patients_with_f

arm0_df_clos = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.closeness_centrality, nx_pathways, mutations_data)
arm0_df_clos.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,2.69470899471,0.0,0.73695257663,0.91358024691,0.0,2.45586968214,18.01333333333,1.08187134503,1.84509887854,8.70921985816
std,5.09432734482,0.0,1.65284382062,5.598796523,0.0,1.55701006177,11.77779108037,2.13819365133,2.8267867145,5.9429913448
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,1.50727423468,9.72,0.0,0.0,5.13829787234
50%,0.0,0.0,0.0,0.0,0.0,2.52261371738,20.4,0.0,0.0,9.60638297872
75%,2.28571428571,0.0,0.0,0.0,0.0,3.44371986304,26.4,0.5701754386,3.62285452697,12.51063829787
max,21.34285714286,0.0,8.09784126829,44.0,0.0,7.35848279182,41.28,10.31578947368,13.49997324661,27.86170212766


In [28]:
pandas.set_option("display.precision", 11)
arm0_db_df_clos = arm0_df_clos.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_clos[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.05205554694
HIPPO                   NaN
WNT          -0.01677642636
NRF2          0.26055653829
MYC                     NaN
RTK-RAS      -0.11431322190
TP53         -0.05435628743
NOTCH         0.07403685237
PI3K          0.09336284236
Cell Cycle   -0.09395603602
Name: dpfs, dtype: float64


In [30]:
from analysis_nx import process_patients_with_f

arm1_df_clos = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.closeness_centrality, nx_pathways, mutations_data)
arm1_df_clos.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,1.66823027719,0.0,0.63744556724,0.05970149254,0.02801717102,2.63326260969,16.91014925373,1.03377847604,1.76261093893,8.00301683074
std,4.33893048562,0.0,1.38394166607,0.69109474047,0.19804138575,1.66904695495,13.09862410183,1.98455521068,2.85228658469,6.027113343
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,1.59733983042,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,2.72852839314,19.92,0.0,0.0,9.49468085106
75%,0.0,0.0,0.0,0.0,0.0,3.60695492537,26.4,0.75438596491,3.35717852833,12.28723404255
max,18.77142857143,0.0,6.87745950847,8.0,1.69837422406,7.73485102251,42.36,9.43859649123,12.78351753536,18.76595744681


In [31]:
pandas.set_option("display.precision", 11)
arm1_db_df_clos = arm1_df_clos.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_clos[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.06535374045
HIPPO                   NaN
WNT          -0.00141656123
NRF2          0.08352453560
MYC          -0.05265988914
RTK-RAS       0.14198272352
TP53         -0.11281684289
NOTCH        -0.02592021718
PI3K          0.07809505960
Cell Cycle   -0.12614421901
Name: dpfs, dtype: float64


### Eigenvector

In [2]:
from analysis_nx import process_patients_with_f

arm0_df_eigen = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.eigenvector_centrality_numpy, nx_pathways, mutations_data)
arm0_df_eigen.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,3.417692,-1.186637e-14,1.869881,0.91358,-8.377088e-14,6.45094e-05,36.88083,1e-06,1.867987,9.703704
std,6.619006,1.568644e-13,8.730605,5.598797,9.329448e-13,0.000185607,24.73271,1.2e-05,6.757887,6.563132
min,0.0,-1.994794e-12,-2.807228e-09,0.0,-9.207767e-12,-1.812048e-09,-2.061634e-11,-5.4e-05,-0.002562747,-1.582068e-15
25%,0.0,0.0,-3.229367e-13,0.0,0.0,1.991737e-08,20.24888,0.0,0.0,5.75
50%,0.0,0.0,0.0,0.0,0.0,3.921957e-07,40.99842,0.0,0.0,10.75
75%,1.5e-05,0.0,9.272366e-13,0.0,0.0,2.02566e-06,54.9967,0.0,2.431824e-11,14.0
max,27.666653,7.242464e-14,57.95548,44.0,2.17874e-12,0.001128642,85.99534,5.5e-05,36.47053,30.0


In [3]:
pandas.set_option("display.precision", 11)
arm0_db_df_eigen = arm0_df_eigen.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_eigen[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.05393331941
HIPPO         0.02100397687
WNT          -0.05335094688
NRF2          0.26055653809
MYC          -0.35960456972
RTK-RAS      -0.11562146901
TP53         -0.04579071213
NOTCH         0.10383057058
PI3K          0.13875199146
Cell Cycle   -0.09226936015
Name: dpfs, dtype: float64


In [4]:
from analysis_nx import process_patients_with_f

arm1_df_eigen = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.eigenvector_centrality_numpy, nx_pathways, mutations_data)
arm1_df_eigen.describe()

Unnamed: 0,TGF-Beta,HIPPO,WNT,NRF2,MYC,RTK-RAS,TP53,NOTCH,PI3K,Cell Cycle
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,2.03482429601,-6.32851054145e-14,1.67848509994,-0.59419157971,-8.82434520805e-13,0.0001001854,34.90863791554,2.24144e-06,1.72215794256,8.92723880597
std,5.63706398897,6.49690875713e-13,8.10296391807,8.11552853681,7.344578529760001e-12,0.00025142319,27.201442072,1.18359e-05,6.06955644373,6.72526270743
min,0.0,-7.45870461098e-12,-2.329425584e-09,-93.32927540791,-8.36431414936e-11,-1.14e-09,-2e-11,-5.591121e-05,-3.44185517072e-10,-3.0531133177199997e-15
25%,0.0,0.0,-4.49598180138e-13,0.0,0.0,2.65e-09,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,3.7359e-07,41.49724377313,0.0,0.0,10.625
75%,0.0,0.0,1.0199035115500002e-12,0.0,0.0,2.79863e-06,54.99761898659,0.0,2.32035327485e-13,13.75
max,24.33332211697,0.0,57.9545669731,7.99999992943,1.10776343248e-12,0.00160694742,83.99456718041,7.0906e-05,37.9534285236,21.0


In [5]:
pandas.set_option("display.precision", 11)
arm1_db_df_eigen = arm1_df_eigen.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_eigen[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
TGF-Beta     -0.05842351847
HIPPO        -0.10707652713
WNT          -0.00277576384
NRF2         -0.14951114897
MYC           0.04435580662
RTK-RAS       0.08210276745
TP53         -0.12418210024
NOTCH         0.02475570199
PI3K         -0.00631692592
Cell Cycle   -0.12639137193
Name: dpfs, dtype: float64
