In [1]:
import os
import ipywidgets as widgets
from IPython.display import display

# PanCan TRIBE2 analysis

## Preliminary work
A baseline for pathway mutation averages needed to be established. In order to do this, a parser for files shipped with [PathwayMapper](http://www.pathwaymapper.org/) was hand-rolled along with a custom data structure describing the pathway contents and hierarchy. Here we show an example of a parsed pathway.

In [2]:
import pathways as lpw

@widgets.interact(pathway=[filename for filename in os.listdir('./pathways')])
def show_pathway(pathway):
    pw = lpw.parse_pathway('./pathways/' + pathway)
    print(f"Name: {pw[0]}, Contents:\n{pw[1]}")

interactive(children=(Dropdown(description='pathway', options=('TGF-Beta.txt', 'HIPPO.txt', 'WNT.txt', 'NRF2.t…

The average mutation is calculated for any given pathway and patient by only considering pathogenic mutations and the maximum mutation percentage detected. Complexes or families count as a single gene towards the overall average mutation (no weights applied).

The average mutation on all pathways for a random patient follows:

In [3]:
import pandas
from analysis import calculate_patient_mutations

pathways = []
for pw in os.listdir('./pathways'):
    pathway = lpw.parse_pathway('./pathways/' + pw)
    pathways.append(pathway)

pathways.sort(key=lambda x: x[0])
    
patients_log = pandas.read_csv('TRIBE2_db.csv')
mutations_data = pandas.read_csv('TRIBE2_seq_res.csv')

result = calculate_patient_mutations('CB224', mutations_data, pathways)
print(result)

{'Cell Cycle': 2.5, 'HIPPO': 0.0, 'MYC': 0.0, 'NOTCH': 0.0, 'NRF2': 0.0, 'PI3K': 0.0, 'RTK-RAS': 3.4166666666666665, 'TGF-Beta': 0.0, 'TP53': 4.166666666666667, 'WNT': 2.3076923076923075}


Patients have been split into two groups, according to the treatment they had (arm0, arm1). This will hold for all future analysis.
Here we show statistics about mutations for the two groups of patients and how the mutations for each pathway correlate with **dpfs**

In [4]:
from analysis import process_patients

arm0_df = process_patients(patients_log[patients_log['arm'] == 0]['PatientFirstName'])
arm0_df.describe()

Unnamed: 0,Cell Cycle,HIPPO,MYC,NOTCH,NRF2,PI3K,RTK-RAS,TGF-Beta,TP53,WNT
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,4.721605,0.025926,0.746914,1.49177,0.415638,2.170595,4.190329,2.254321,7.045267,3.664292
std,3.045467,0.193547,3.021788,2.307105,2.100991,3.117726,3.30484,4.078182,4.383254,2.437574
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.0,0.0,0.0,0.0,0.0,0.0,2.25,0.0,4.5,2.153846
50%,4.8,0.0,0.0,0.0,0.0,0.0,3.583333,0.0,7.583333,3.346154
75%,6.675,0.0,0.0,2.833333,0.0,4.136364,5.541667,3.95,10.333333,5.076923
max,18.1,1.7,17.5,11.6,14.666667,20.727273,15.833333,16.6,19.666667,11.923077


In [5]:
pandas.set_option("display.precision", 11)
arm0_db_df = arm0_df.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df[['dpfs'] + [pw[0] for pw in pathways]].corr().iloc[0]}")

dpfs          1.00000000000
Cell Cycle   -0.04440247702
HIPPO         0.04245896105
MYC           0.26819358259
NOTCH         0.05562896088
NRF2          0.20949607706
PI3K          0.16945628397
RTK-RAS      -0.08269877738
TGF-Beta     -0.04376672946
TP53         -0.01443085081
WNT           0.12680323858
Name: dpfs, dtype: float64


In [6]:
arm1_df = process_patients(patients_log[patients_log['arm'] == 1]['PatientFirstName'])
arm1_df.describe()

Unnamed: 0,Cell Cycle,HIPPO,MYC,NOTCH,NRF2,PI3K,RTK-RAS,TGF-Beta,TP53,WNT
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,4.24925373134,0.03134328358,0.91044776119,1.27711442786,0.48258706468,1.99728629579,4.86940298507,1.56567164179,6.53980099502,3.51894374282
std,2.93671972567,0.26961903238,3.08988207325,2.04632816182,2.54009359321,2.94420748388,3.69884275634,3.6578087133,4.77239604779,2.19208570035
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.3,0.0,0.0,0.0,0.0,0.0,2.52083333333,0.0,0.5,2.30769230769
50%,4.85,0.0,0.0,0.0,0.0,0.0,3.75,0.0,7.41666666667,3.26923076923
75%,6.2,0.0,0.0,2.53333333333,0.0,3.22727272727,6.66666666667,0.0,9.83333333333,4.75
max,13.2,2.8,18.75,9.66666666667,17.33333333333,15.72727272727,19.91666666667,14.6,22.0,9.92307692308


In [7]:
pandas.set_option("display.precision", 11)
arm1_db_df = arm1_df.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df[['dpfs'] + [pw[0] for pw in pathways]].corr().iloc[0]}")

dpfs          1.00000000000
Cell Cycle   -0.12647532371
HIPPO         0.07618156944
MYC          -0.01966932520
NOTCH        -0.00766043272
NRF2          0.11540925246
PI3K          0.00471179680
RTK-RAS       0.13577007518
TGF-Beta     -0.08225417760
TP53         -0.04168762758
WNT          -0.00075981504
Name: dpfs, dtype: float64


## Conversion of pathway data
Pathways are parsed from pathway files shipped with [PathwayMapper](http://www.pathwaymapper.org/). At this stage, the obtained data is transformed into a NetworkX graph giving each gene its own vertex: complexes and families are not represented explicitly. The resulting graph is directed.

In [8]:
import networkx as nx
import pathways_nx as pnx
import matplotlib.pyplot as plt
import pylab
import logging as log

plt.rcParams['figure.dpi'] = 90

@widgets.interact(pathway=[filename for filename in os.listdir('./pathways')])
def show_pathway(pathway):
    pw = pnx.pathway_to_nx('pathways/' + pathway)

    edge_labels=dict([((u,v,),d['label'])
                 for u,v,d in pw[1].edges(data=True)])
    labels = nx.get_node_attributes(pw[1], 'label')
    pos=nx.spring_layout(pw[1], 8)

    plt.figure(1,figsize=(12,12)) 
    nx.draw_networkx_edge_labels(pw[1], pos, edge_labels=edge_labels)
    nx.draw(pw[1], pos, node_size=1700, labels=labels, with_labels=True, node_shape="o",  node_color="none", bbox=dict(facecolor="skyblue", edgecolor='black', boxstyle='round,pad=0.4'))

interactive(children=(Dropdown(description='pathway', options=('TGF-Beta.txt', 'HIPPO.txt', 'WNT.txt', 'NRF2.t…

## Computing weighted averages with no complexes
In order to improve the correlation between **dpfs** and pathway mutations, we can employ weights on each gene.</br>
These are derived from various centrality measures and don't take into account the gene hierarchy.

In [9]:
import pathways_nx as pnx
import networkx as nx
import os
import pandas

nx_pathways = []
for filename in os.listdir('./pathways'):
    nx_pathways.append(pnx.pathway_to_nx('pathways/' + filename))

nx_pathways.sort(key=lambda x: x[0])
    
patients_log = pandas.read_csv('TRIBE2_db.csv')
mutations_data = pandas.read_csv('TRIBE2_seq_res.csv')

### In-degree

In [10]:
from analysis_nx import process_patients_with_f

arm0_df_indeg = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.in_degree_centrality, nx_pathways, mutations_data)
arm0_df_indeg.describe()

Unnamed: 0,Cell Cycle,HIPPO,MYC,NOTCH,NRF2,PI3K,RTK-RAS,TGF-Beta,TP53,WNT
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,9.7037037037,0.0,0.0,1.16352201258,0.91358024691,2.35570987654,1.49255002129,2.62692901235,18.76388888889,0.6550617284
std,6.56313238433,0.0,0.0,2.29956675709,5.598796523,3.61499308582,1.0426796787,4.9530571154,12.26853237539,1.42037710281
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.75,0.0,0.0,0.0,0.0,0.0,0.8275862069,0.0,10.125,0.0
50%,10.75,0.0,0.0,0.0,0.0,0.0,1.36206896552,0.0,21.25,0.0
75%,14.0,0.0,0.0,0.61320754717,0.0,4.53125,2.10344827586,2.265625,27.5,0.0
max,30.0,0.0,0.0,11.09433962264,44.0,12.70833333333,4.7816091954,20.75,43.0,7.34


In [11]:
pandas.set_option("display.precision", 11)
arm0_db_d_indegf = arm0_df_indeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_d_indegf[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
Cell Cycle   -0.09226936015
HIPPO                   NaN
MYC                     NaN
NOTCH         0.07403685237
NRF2          0.26055653829
PI3K          0.03503757492
RTK-RAS      -0.11310601928
TGF-Beta     -0.05180275226
TP53         -0.05435628743
WNT          -0.02174170470
Name: dpfs, dtype: float64


In [12]:
from analysis_nx import process_patients_with_f

arm1_df_indeg = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.in_degree_centrality, nx_pathways, mutations_data)
arm1_df_indeg.describe()

Unnamed: 0,Cell Cycle,HIPPO,MYC,NOTCH,NRF2,PI3K,RTK-RAS,TGF-Beta,TP53,WNT
count,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0,134.0
mean,8.92723880597,0.0,0.03616532721,1.1117994931,0.05970149254,2.22388059701,1.6881111683,1.6338619403,17.61473880597,0.66671641791
std,6.72526270743,0.0,0.25563721307,2.13433296243,0.69109474047,3.71690587683,1.17443857065,4.22081387824,13.64440010607,1.39379493649
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.93103448276,0.0,0.0,0.0
50%,10.625,0.0,0.0,0.0,0.0,0.0,1.51724137931,0.0,20.75,0.0
75%,13.75,0.0,0.0,0.81132075472,0.0,4.125,2.27586206897,0.0,27.5,0.0
max,21.0,0.0,2.19230769231,10.15094339623,8.0,17.91666666667,5.83908045977,18.25,44.125,7.54


In [13]:
pandas.set_option("display.precision", 11)
arm1_db_df_indeg = arm1_df_indeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_indeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
Cell Cycle   -0.12639137193
HIPPO                   NaN
MYC          -0.05265988914
NOTCH        -0.02592021718
NRF2          0.08352453560
PI3K          0.13209202648
RTK-RAS       0.14794631496
TGF-Beta     -0.06616595990
TP53         -0.11281684289
WNT          -0.05256678378
Name: dpfs, dtype: float64


### Out-degree

In [14]:
from analysis_nx import process_patients_with_f

arm0_df_outdeg = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.out_degree_centrality, nx_pathways, mutations_data)
arm0_df_outdeg.describe()

Unnamed: 0,Cell Cycle,HIPPO,MYC,NOTCH,NRF2,PI3K,RTK-RAS,TGF-Beta,TP53,WNT
count,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0,162.0
mean,10.43904320988,0.01037037037,0.11490978158,0.70288842301,0.16666666667,1.47325102881,1.51035901802,0.19097222222,0.82330246914,0.9837037037
std,6.4232242763,0.07741867213,0.46489043298,1.52381859048,1.49948231232,2.16942021878,1.86145914401,1.15780437321,2.40214727723,0.9006504819
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.75,0.0,0.0,0.0,0.0,0.0,0.8275862069,0.0,0.0,0.485
50%,11.5,0.0,0.0,0.0,0.0,0.0,1.25287356322,0.0,0.0,0.78
75%,14.84375,0.0,0.0,0.0,0.0,2.75,1.75,0.0,0.0,1.28
max,25.0,0.68,2.69230769231,6.81132075472,14.5,11.08333333333,16.58620689655,9.9375,16.5,5.28


In [15]:
pandas.set_option("display.precision", 11)
arm0_db_df_outdeg = arm0_df_outdeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_outdeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

dpfs          1.00000000000
Cell Cycle   -0.04413363227
HIPPO         0.04245896105
MYC           0.26819358259
NOTCH         0.06066446533
NRF2         -0.04613419523
PI3K          0.09017649935
RTK-RAS      -0.09855698721
TGF-Beta      0.02890916087
TP53          0.00559572918
WNT           0.09316713412
Name: dpfs, dtype: float64


In [None]:
from analysis_nx import process_patients_with_f

arm1_df_outdeg = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.out_degree_centrality, nx_pathways, mutations_data)
arm1_df_outdeg.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm1_db_df_outdeg = arm1_df_outdeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_outdeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Betweenness

In [None]:
from analysis_nx import process_patients_with_f

arm0_df_bet = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.betweenness_centrality, nx_pathways, mutations_data)
arm0_df_bet.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm0_db_df_bet = arm0_df_bet.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_bet[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

arm1_df_bet = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.betweenness_centrality, nx_pathways, mutations_data)
arm1_df_bet.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm1_db_df_bet = arm1_df_bet.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_bet[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Closeness

In [None]:
from analysis_nx import process_patients_with_f

arm0_df_clos = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.closeness_centrality, nx_pathways, mutations_data)
arm0_df_clos.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm0_db_df_clos = arm0_df_clos.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_clos[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

arm1_df_clos = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.closeness_centrality, nx_pathways, mutations_data)
arm1_df_clos.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm1_db_df_clos = arm1_df_clos.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_clos[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Eigenvector

In [None]:
from analysis_nx import process_patients_with_f

arm0_df_eigen = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.eigenvector_centrality_numpy, nx_pathways, mutations_data)
arm0_df_eigen.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm0_db_df_eigen = arm0_df_eigen.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm0_db_df_eigen[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

arm1_df_eigen = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.eigenvector_centrality_numpy, nx_pathways, mutations_data)
arm1_df_eigen.describe()

In [None]:
pandas.set_option("display.precision", 11)
arm1_db_df_eigen = arm1_df_eigen.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{arm1_db_df_eigen[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

## Hierarchy-aware evaluation
This time the hierarchy of a gene inside a complex or family is taken into account when computing the average mutations. This is represented with a weight, which is computed as the reciprocal of the product of the gene containers' cardinalities. For example, if a gene is contained in a family of 4, which is contained in a family of 6, it would have a weight of 1/4\*1/6 = 1/24.

In [None]:
import pathways_nx as pnx
import networkx as nx
import os
import pandas

nx_pathways = []
for filename in os.listdir('./pathways'):
    nx_pathways.append(pnx.pathway_to_nx('pathways/' + filename))

nx_pathways.sort(key=lambda x: x[0])
    
patients_log = pandas.read_csv('TRIBE2_db.csv')
mutations_data = pandas.read_csv('TRIBE2_seq_res.csv')

### In-degree

In [None]:
from analysis_nx import process_patients_with_f

h_arm0_df_indeg = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.in_degree_centrality, nx_pathways, mutations_data, True)
h_arm0_df_indeg.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm0_db_d_indegf = h_arm0_df_indeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm0_db_d_indegf[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

h_arm1_df_indeg = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.in_degree_centrality, nx_pathways, mutations_data, True)
h_arm1_df_indeg.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm1_db_df_indeg = h_arm1_df_indeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm1_db_df_indeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Out-degree

In [None]:
from analysis_nx import process_patients_with_f

h_arm0_df_outdeg = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.out_degree_centrality, nx_pathways, mutations_data, True)
h_arm0_df_outdeg.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm0_db_df_outdeg = h_arm0_df_outdeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm0_db_df_outdeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

h_arm1_df_outdeg = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.out_degree_centrality, nx_pathways, mutations_data, True)
h_arm1_df_outdeg.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm1_db_df_outdeg = h_arm1_df_outdeg.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm1_db_df_outdeg[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Betweenness

In [None]:
from analysis_nx import process_patients_with_f

h_arm0_df_bet = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.betweenness_centrality, nx_pathways, mutations_data, True)
h_arm0_df_bet.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm0_db_df_bet = h_arm0_df_bet.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm0_db_df_bet[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

h_arm1_df_bet = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.betweenness_centrality, nx_pathways, mutations_data, True)
h_arm1_df_bet.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm1_db_df_bet = h_arm1_df_bet.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm1_db_df_bet[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Closeness

In [None]:
from analysis_nx import process_patients_with_f

h_arm0_df_clos = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.closeness_centrality, nx_pathways, mutations_data, True)
h_arm0_df_clos.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm0_db_df_clos = h_arm0_df_clos.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm0_db_df_clos[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

h_arm1_df_clos = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.closeness_centrality, nx_pathways, mutations_data, True)
h_arm1_df_clos.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm1_db_df_clos = h_arm1_df_clos.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm1_db_df_clos[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

### Eigenvector

In [None]:
from analysis_nx import process_patients_with_f

h_arm0_df_eigen = process_patients_with_f(patients_log[patients_log['arm'] == 0]['PatientFirstName'], nx.eigenvector_centrality_numpy, nx_pathways, mutations_data, True)
h_arm0_df_eigen.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm0_db_df_eigen = h_arm0_df_eigen.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm0_db_df_eigen[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")

In [None]:
from analysis_nx import process_patients_with_f

h_arm1_df_eigen = process_patients_with_f(patients_log[patients_log['arm'] == 1]['PatientFirstName'], nx.eigenvector_centrality_numpy, nx_pathways, mutations_data, True)
h_arm1_df_eigen.describe()

In [None]:
pandas.set_option("display.precision", 11)
h_arm1_db_df_eigen = h_arm1_df_eigen.join(patients_log.set_index('PatientFirstName'), on='PatientFirstName')
print(f"{h_arm1_db_df_eigen[['dpfs'] + [pw[0] for pw in nx_pathways]].corr().iloc[0]}")