In [2]:
import pandas as pd
from ast import literal_eval
import numpy as np
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from statistics import mean, median
import scipy
from sklearn.decomposition import PCA
from sklearn import preprocessing
from gprofiler import GProfiler
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
import operator
import qvalue as qv


#Reactome file containing information on pathways, the genes they contain and pathway name, also including the illumina identifier for the genes.

reactome = pd.read_csv('../data/reactome.csv', sep=',', index_col = 0)

def read_reactome(file_name, gene_name_start = "ENSG0"):
    df = pd.read_csv(file_name, sep='\t', header=None)
    
    if gene_name_start == None:
        sub_df = df
    else:
        subset_vec = df[0].str.startswith(gene_name_start)
        sub_df = df.loc[subset_vec]
    
    genes_df = sub_df.groupby(1)[0].apply(list)
    names_df = sub_df.groupby(1)[3].max()
    
    out_df = pd.concat([genes_df,names_df], axis=1)
    out_df.columns = ['genes', 'pathway_name']
    
    return out_df

low_level = read_reactome('../data/Ensembl2Reactome_All_Levels.txt.gz')

def my_pca(df, n_pc=1, normalize=True):
    df = df.dropna(axis = 0, how = 'all')#redundant, but keeping it just in case
    X = df.values.T
    if normalize:
        X2 = preprocessing.scale(X)
    else:
        X2 = X
    pca = PCA(n_components = n_pc)
    pca.fit(X2)
    my_pca.pca = pca  #needed for components
    Xnew = pca.fit_transform(X2)
    out_df = pd.DataFrame(Xnew.transpose(), index=list(range(1,n_pc+1)), columns=df.columns)
    out_df = out_df.transpose()
    
    return out_df, my_pca.pca.components_, my_pca.pca.explained_variance_ratio_

#Importing metabric dataset, dividing up what is clinical/expression data and changing the type of the expression columns to float
metabric_data = pd.read_csv('../data/metabric.csv.gz')
#clinical_data = metabric_data.iloc[:27, :]
expression_data = metabric_data.iloc[27:,:]

#print(expression_data.columns)
dtypedict = {}
for i in expression_data.columns[1:]:
    dtypedict[i] = 'float32'
expression_data = expression_data.astype(dtypedict)



new_clinical_patient = pd.read_csv('../data/data_clinical_patient.txt', sep='\t', index_col=0).iloc[4:]
new_clinical_sample = pd.read_csv('../data/data_clinical_sample.txt', sep='\t', index_col=0).iloc[4:]
new_clinical = pd.concat([new_clinical_patient, new_clinical_sample.reindex(new_clinical_patient.index)], axis=1)
new_clinical['Triple Neg'] = new_clinical.apply(lambda row: True if ((row['ER Status'] == 'Negative') 
                                                                     and (row['PR Status'] == 'Negative') 
                                                                     and (row['HER2 Status'] == 'Negative')) else False, axis = 1)

new_clinical['ER-/PR-/HER2+'] = new_clinical.apply(lambda row: True if ((row['ER Status'] == 'Negative') 
                                                                     and (row['PR Status'] == 'Negative') 
                                                                     and (row['HER2 Status'] == 'Positive')) else False, axis = 1)





genes = expression_data['Unnamed: 0'].values.tolist()

gp = GProfiler(return_dataframe = True)
gp = gp.convert(organism='hsapiens',
          query=genes)

gp = gp.loc[gp['n_converted'] == 1]
gp = gp.loc[gp['name'] != 'None']
gp = gp.set_index('incoming')
gprofiler_names = gp
gprofiler_names

dataset = expression_data.set_index('Unnamed: 0') #gene_patient
pca_per_pathway = pd.DataFrame(index=expression_data.columns)

real_gene_names = pd.read_csv('../data/illumina2symbol.txt', sep="\t", index_col = 0)



genes_components_per_pathway = {} #nested dictionary where the 'outer dictionary' is the pathway names as keys and values are 
                                  #another dictionary with genes as keys and components as values

for pathway in reactome.index:
    genes = reactome.loc[pathway, "illumina"]
    genes = literal_eval(genes)
    genes = list(filter(lambda a: a != 'NaN', genes))
    pathwaydata = dataset.loc[genes]
    if pathwaydata.index.empty == True:
        pass
    else:
        pathwaydata = pathwaydata.dropna(axis = 0, how = 'any') #has to be done so the lists match, this makes the dropna in my_pca function obsolete
        presentgenes = pathwaydata.index.values.tolist()
        if len(presentgenes) <= 1:
            pass
        else:
            res, components, explained_variance = my_pca(pathwaydata)
            pathwayname = reactome.loc[pathway, 'pathway_name']
            pca_per_pathway[pathwayname] = res

            components = components.tolist()[0]
            innerdict = {}
            for i in range(0, len(presentgenes)):
                component = components[i]
                gene = genes[i]
                if gene in real_gene_names.index:
                    real_name = real_gene_names.loc[gene, "symbol"]
                    innerdict[real_name] = component
                elif gene in gprofiler_names.index:
                    real_name = gprofiler_names.loc[gene, 'name']
                    innerdict[real_name] = component
                else:
                    innerdict[gene] = component
            sorted_innerdict = sorted(innerdict.items(), key = operator.itemgetter(1), reverse = True)
            genes_components_per_pathway[pathwayname] = [sorted_innerdict, explained_variance.flat[0]]

pca_per_pathway = pca_per_pathway.iloc[1:]

<Figure size 640x480 with 0 Axes>

In [3]:
full_df = pd.concat([pca_per_pathway, new_clinical.reindex(pca_per_pathway.index)], axis=1)


In [4]:
size = full_df['Tumor Size'].astype(float)
size[size >= 100]

MB-0112    150.0
MB-0406    180.0
MB-0453    100.0
MB-0656    100.0
MB-0657    100.0
MB-0660    160.0
MB-3436    130.0
MB-6063    120.0
MB-7148    130.0
MB-7165    182.0
Name: Tumor Size, dtype: float64

In [5]:
from scipy.stats import ttest_ind, mannwhitneyu
import qvalue as qv


grouped_ttest = pd.DataFrame(index=full_df.iloc[:,:-33].columns)
grouped_by_cluster = full_df.groupby('Integrative Cluster')

for group in grouped_by_cluster:
    df = group[1].iloc[:,:-33]
    groupname = group[0]
    group2_df = full_df[full_df['Integrative Cluster'] != groupname].iloc[:,:-33]
    pvaluelist = []
    for pathway in df:
        group = df[pathway]
        group2 = group2_df[pathway]
        test = mannwhitneyu(group, group2)
        pvaluelist.append(test[1])
        
    empty_df = pd.DataFrame(index=full_df.iloc[:,:-33].columns)
    
    empty_df[f'Cluster {groupname} p-values'] = pvaluelist
    print(empty_df)
    qv.qvalues(empty_df, f'Cluster {groupname} p-values', f'Cluster {groupname} q-values')
    empty_df[f'Cluster {groupname} p-values'] = -np.log10(empty_df[f'Cluster {groupname} p-values'])
    empty_df[f'Cluster {groupname} q-values'] = -np.log10(empty_df[f'Cluster {groupname} q-values'])
    print(empty_df)
    grouped_ttest = pd.concat([grouped_ttest, empty_df], axis = 1)    
        
grouped_ttest

                                                    Cluster 1 p-values
Interleukin-6 signaling                                   5.186702e-04
Apoptosis                                                 3.172921e-05
Hemostasis                                                1.654836e-03
Intrinsic Pathway for Apoptosis                           9.744648e-02
PKB-mediated events                                       2.635249e-03
PI3K Cascade                                              1.102894e-01
MAPK3 (ERK1) activation                                   3.697860e-02
Translesion synthesis by REV1                             1.084238e-05
Translesion synthesis by Y family DNA polymeras...        5.224905e-16
Recognition of DNA damage by PCNA-containing re...        4.624878e-12
Translesion Synthesis by POLH                             6.687313e-08
Recognition and association of DNA glycosylase ...        2.122693e-26
Cleavage of the damaged pyrimidine                        2.122693e-26
Recogn

                                                    Cluster 10 p-values
Interleukin-6 signaling                                    8.113491e-08
Apoptosis                                                  9.733071e-60
Hemostasis                                                 4.866084e-31
Intrinsic Pathway for Apoptosis                            6.717934e-89
PKB-mediated events                                        4.035898e-05
PI3K Cascade                                               3.588494e-01
MAPK3 (ERK1) activation                                    1.685368e-27
Translesion synthesis by REV1                              4.862523e-62
Translesion synthesis by Y family DNA polymeras...         2.988992e-80
Recognition of DNA damage by PCNA-containing re...         6.726642e-79
Translesion Synthesis by POLH                              1.052467e-56
Recognition and association of DNA glycosylase ...         1.540760e-02
Cleavage of the damaged pyrimidine                         1.540

                                                    Cluster 2 p-values
Interleukin-6 signaling                                   2.061650e-02
Apoptosis                                                 4.259553e-01
Hemostasis                                                4.387007e-03
Intrinsic Pathway for Apoptosis                           1.230562e-01
PKB-mediated events                                       3.317693e-01
PI3K Cascade                                              5.224115e-03
MAPK3 (ERK1) activation                                   2.308090e-01
Translesion synthesis by REV1                             5.744746e-03
Translesion synthesis by Y family DNA polymeras...        2.491368e-03
Recognition of DNA damage by PCNA-containing re...        1.077563e-03
Translesion Synthesis by POLH                             5.886253e-03
Recognition and association of DNA glycosylase ...        5.473405e-04
Cleavage of the damaged pyrimidine                        5.473405e-04
Recogn

                                                    Cluster 3 p-values
Interleukin-6 signaling                                   6.732247e-05
Apoptosis                                                 4.219411e-07
Hemostasis                                                1.455990e-01
Intrinsic Pathway for Apoptosis                           4.484992e-06
PKB-mediated events                                       4.619212e-02
PI3K Cascade                                              4.321703e-05
MAPK3 (ERK1) activation                                   3.573999e-01
Translesion synthesis by REV1                             2.857977e-10
Translesion synthesis by Y family DNA polymeras...        6.404465e-25
Recognition of DNA damage by PCNA-containing re...        3.891546e-18
Translesion Synthesis by POLH                             6.496640e-11
Recognition and association of DNA glycosylase ...        2.020645e-01
Cleavage of the damaged pyrimidine                        2.020645e-01
Recogn

                                                    Cluster 4ER+ p-values
Interleukin-6 signaling                                      2.382400e-11
Apoptosis                                                    2.805844e-01
Hemostasis                                                   7.433489e-22
Intrinsic Pathway for Apoptosis                              3.523330e-07
PKB-mediated events                                          4.237196e-09
PI3K Cascade                                                 7.161279e-08
MAPK3 (ERK1) activation                                      3.307940e-05
Translesion synthesis by REV1                                3.739303e-18
Translesion synthesis by Y family DNA polymeras...           1.123579e-37
Recognition of DNA damage by PCNA-containing re...           4.537208e-39
Translesion Synthesis by POLH                                1.275302e-28
Recognition and association of DNA glycosylase ...           7.132936e-09
Cleavage of the damaged pyrimidine    

                                                    Cluster 4ER- p-values
Interleukin-6 signaling                                      1.216226e-05
Apoptosis                                                    5.517435e-03
Hemostasis                                                   5.832565e-20
Intrinsic Pathway for Apoptosis                              2.421401e-01
PKB-mediated events                                          3.710898e-06
PI3K Cascade                                                 2.686865e-06
MAPK3 (ERK1) activation                                      4.068825e-06
Translesion synthesis by REV1                                3.318629e-06
Translesion synthesis by Y family DNA polymeras...           1.109458e-05
Recognition of DNA damage by PCNA-containing re...           1.435355e-06
Translesion Synthesis by POLH                                2.081855e-09
Recognition and association of DNA glycosylase ...           3.665520e-07
Cleavage of the damaged pyrimidine    

                                                    Cluster 5 p-values
Interleukin-6 signaling                                   1.425766e-03
Apoptosis                                                 1.427767e-04
Hemostasis                                                2.010551e-05
Intrinsic Pathway for Apoptosis                           1.145333e-07
PKB-mediated events                                       2.561220e-02
PI3K Cascade                                              1.278530e-28
MAPK3 (ERK1) activation                                   3.487466e-02
Translesion synthesis by REV1                             2.439997e-01
Translesion synthesis by Y family DNA polymeras...        9.708217e-10
Recognition of DNA damage by PCNA-containing re...        3.216622e-05
Translesion Synthesis by POLH                             2.787454e-03
Recognition and association of DNA glycosylase ...        2.771763e-01
Cleavage of the damaged pyrimidine                        2.771763e-01
Recogn

                                                    Cluster 6 p-values
Interleukin-6 signaling                                       0.001069
Apoptosis                                                     0.410647
Hemostasis                                                    0.000010
Intrinsic Pathway for Apoptosis                               0.405259
PKB-mediated events                                           0.049219
PI3K Cascade                                                  0.000027
MAPK3 (ERK1) activation                                       0.110215
Translesion synthesis by REV1                                 0.009114
Translesion synthesis by Y family DNA polymeras...            0.001082
Recognition of DNA damage by PCNA-containing re...            0.006773
Translesion Synthesis by POLH                                 0.005669
Recognition and association of DNA glycosylase ...            0.003256
Cleavage of the damaged pyrimidine                            0.003256
Recogn

                                                    Cluster 7 p-values
Interleukin-6 signaling                                   7.374500e-02
Apoptosis                                                 1.092507e-12
Hemostasis                                                1.755731e-13
Intrinsic Pathway for Apoptosis                           4.892944e-21
PKB-mediated events                                       1.953309e-06
PI3K Cascade                                              8.072916e-02
MAPK3 (ERK1) activation                                   6.037839e-11
Translesion synthesis by REV1                             1.166875e-02
Translesion synthesis by Y family DNA polymeras...        1.186329e-08
Recognition of DNA damage by PCNA-containing re...        4.011272e-06
Translesion Synthesis by POLH                             2.867555e-06
Recognition and association of DNA glycosylase ...        2.079250e-02
Cleavage of the damaged pyrimidine                        2.079250e-02
Recogn

                                                    Cluster 8 p-values
Interleukin-6 signaling                                   3.903899e-08
Apoptosis                                                 3.324388e-25
Hemostasis                                                1.811018e-47
Intrinsic Pathway for Apoptosis                           3.263356e-10
PKB-mediated events                                       7.049243e-10
PI3K Cascade                                              1.362356e-04
MAPK3 (ERK1) activation                                   5.297822e-16
Translesion synthesis by REV1                             4.464854e-04
Translesion synthesis by Y family DNA polymeras...        7.456967e-08
Recognition of DNA damage by PCNA-containing re...        8.682188e-05
Translesion Synthesis by POLH                             1.606987e-01
Recognition and association of DNA glycosylase ...        9.871047e-04
Cleavage of the damaged pyrimidine                        9.871047e-04
Recogn

                                                    Cluster 9 p-values
Interleukin-6 signaling                                   1.903171e-05
Apoptosis                                                 3.249663e-01
Hemostasis                                                4.092132e-04
Intrinsic Pathway for Apoptosis                           4.138321e-01
PKB-mediated events                                       6.177639e-02
PI3K Cascade                                              9.455896e-06
MAPK3 (ERK1) activation                                   1.334850e-02
Translesion synthesis by REV1                             9.190198e-02
Translesion synthesis by Y family DNA polymeras...        2.830620e-07
Recognition of DNA damage by PCNA-containing re...        1.188100e-05
Translesion Synthesis by POLH                             8.378610e-03
Recognition and association of DNA glycosylase ...        2.898588e-02
Cleavage of the damaged pyrimidine                        2.897599e-02
Recogn

Unnamed: 0,Cluster 1 p-values,Cluster 1 q-values,Cluster 10 p-values,Cluster 10 q-values,Cluster 2 p-values,Cluster 2 q-values,Cluster 3 p-values,Cluster 3 q-values,Cluster 4ER+ p-values,Cluster 4ER+ q-values,...,Cluster 5 p-values,Cluster 5 q-values,Cluster 6 p-values,Cluster 6 q-values,Cluster 7 p-values,Cluster 7 q-values,Cluster 8 p-values,Cluster 8 q-values,Cluster 9 p-values,Cluster 9 q-values
2-LTR circle formation,3.760961,3.335132,18.711431,18.385849,0.325594,0.312018,1.881159,1.696610,8.625987,8.211725,...,5.889667,5.319493,0.336657,0.323548,1.601092,1.461820,8.159628,7.821457,2.886596,2.479343
5-Phosphoribose 1-diphosphate biosynthesis,0.345861,0.333758,15.974278,15.677000,1.045455,0.770827,0.902001,0.812664,6.189245,5.861441,...,0.685590,0.609081,1.176011,0.938421,3.107078,2.852851,2.044017,1.911900,0.935861,0.791947
A tetrasaccharide linker sequence is required for GAG synthesis,8.375755,7.582692,7.269704,7.085795,2.155733,1.521358,13.725188,12.984721,8.374111,7.970056,...,2.658408,2.334594,0.350185,0.332666,2.683999,2.454971,1.929601,1.804243,5.047063,4.435262
ABC transporter disorders,7.503048,6.786122,33.002068,32.520950,0.387921,0.349866,4.239394,3.874409,1.241745,1.148691,...,2.193869,1.918848,0.344877,0.329532,6.063705,5.638434,17.625445,17.020443,0.476183,0.425300
ABC transporters in lipid homeostasis,2.180941,1.890257,70.425206,69.536532,4.768345,3.243928,9.805411,9.191894,3.275117,3.069571,...,24.876615,23.161867,4.569565,3.576682,17.352672,16.035863,36.164804,35.018257,1.424468,1.211121
ABC-family proteins mediated transport,7.218621,6.531222,35.018455,34.507147,0.340716,0.322760,4.383301,4.004512,1.664934,1.544301,...,2.240816,1.962628,0.375231,0.348679,6.529664,6.075583,15.774023,15.239687,0.453226,0.408027
ABO blood group biosynthesis,0.821769,0.701136,2.607131,2.508858,1.903703,1.358481,0.393219,0.376135,5.278316,4.988857,...,0.720298,0.638765,4.837029,3.773074,2.530621,2.310722,3.941170,3.726109,0.410581,0.375487
ADP signalling through P2Y purinoceptor 1,1.665815,1.423457,6.976290,6.796835,0.916661,0.681919,13.591149,12.852980,11.845372,11.291991,...,1.295572,1.127437,4.455846,3.483429,0.618850,0.566623,1.117781,1.040433,5.692188,5.035296
ADP signalling through P2Y purinoceptor 12,3.115592,2.737803,4.658852,4.515230,1.022234,0.755483,15.881230,15.061351,16.676885,15.928278,...,1.954865,1.707354,5.777599,4.481980,0.919203,0.832588,2.683503,2.523849,6.344734,5.614512
AKT phosphorylates targets in the cytosol,3.216786,2.832958,11.108934,10.879551,1.160667,0.850752,1.609689,1.449732,3.927190,3.694600,...,3.167744,2.790954,0.824871,0.669130,2.161445,1.969467,4.162660,3.941370,0.924645,0.783348


In [7]:
grouped_ttest.loc['Cell Cycle']

Cluster 1 p-values        17.792930
Cluster 1 q-values        16.449249
Cluster 10 p-values      105.193740
Cluster 10 q-values      103.132204
Cluster 2 p-values         1.675756
Cluster 2 q-values         1.201556
Cluster 3 p-values        45.929201
Cluster 3 q-values        43.691573
Cluster 4ER+ p-values     44.653553
Cluster 4ER+ q-values     42.783902
Cluster 4ER- p-values      1.033751
Cluster 4ER- q-values      0.918066
Cluster 5 p-values        19.062704
Cluster 5 q-values        17.741685
Cluster 6 p-values         3.170968
Cluster 6 q-values         2.472208
Cluster 7 p-values         8.998319
Cluster 7 q-values         8.356786
Cluster 8 p-values        15.567307
Cluster 8 q-values        15.037958
Cluster 9 p-values        13.357865
Cluster 9 q-values        11.853969
Name: Cell Cycle, dtype: float64

In [10]:
for i in clusterframes:
    print(clusterframes[i].loc['Cell Cycle'].iloc[4:])

cluster 1 qvalues    16.449249
Name: Cell Cycle, dtype: float64
cluster 10 qvalues    103.132204
Name: Cell Cycle, dtype: float64
cluster 2 qvalues    1.201556
Name: Cell Cycle, dtype: float64
cluster 3 qvalues    43.691573
Name: Cell Cycle, dtype: float64
cluster 4ER+ qvalues    42.783902
Name: Cell Cycle, dtype: float64
cluster 4ER- qvalues    0.918066
Name: Cell Cycle, dtype: float64
cluster 5 qvalues    17.741685
Name: Cell Cycle, dtype: float64
cluster 6 qvalues    2.472208
Name: Cell Cycle, dtype: float64
cluster 7 qvalues    8.356786
Name: Cell Cycle, dtype: float64
cluster 8 qvalues    15.037958
Name: Cell Cycle, dtype: float64
cluster 9 qvalues    11.853969
Name: Cell Cycle, dtype: float64


In [None]:
grouped_ttest.to_csv('../exp/pathway_qvalues.csv')

In [None]:
grouped_ttest.sort_values(by='Cluster 7 q-values', ascending=False).iloc[:30,:]

In [9]:
from scipy.stats import ttest_ind, mannwhitneyu
import qvalue as qv

clusterframes = {}

grouped_by_cluster = full_df.groupby('Integrative Cluster')

for group in grouped_by_cluster:
    df_cluster = pd.DataFrame(index=full_df.iloc[:,:-33].columns)
    groupname = group[0]
    df = group[1].iloc[:,:-33]
    group2_df = full_df[full_df['Integrative Cluster'] != groupname].iloc[:,:-33]
    pvaluelist = []
    group1_mean_list = []
    group2_mean_list = []
    for pathway in df:
        group = df[pathway]
        group2 = group2_df[pathway]
        test = mannwhitneyu(group, group2)
        pvaluelist.append(test[1])
        group_mean = group.mean()
        group1_mean_list.append(group_mean)
        group2_mean = group2.mean()
        group2_mean_list.append(group2_mean) 
        
    
    df_cluster[f'Cluster {groupname}'] = group1_mean_list
    df_cluster['Other clusters'] = group2_mean_list
    df_cluster['Fold Change'] = np.log2(abs(df_cluster[f'Cluster {groupname}'])) - np.log2(abs(df_cluster['Other clusters']))
    
    
    df_cluster['p-values'] = pvaluelist
    qv.qvalues(df_cluster, 'p-values', f'cluster {groupname} qvalues')
    df_cluster['p-values'] = -np.log10(df_cluster['p-values'])
    df_cluster[f'cluster {groupname} qvalues'] = -np.log10(df_cluster[f'cluster {groupname} qvalues'])
    print(groupname)
    clusterframes[groupname] = df_cluster
        
        
#clusterframes

1
10
2
3
4ER+
4ER-
5
6
7
8
9


In [None]:
clusterframes['5'][clusterframes['5']['cluster 5 qvalues'] > 3].shape

In [None]:
for i in new_clinical.columns:
    print(i)

In [1]:
clinical_cat = 'ER-/PR-/HER2+'
def cluster_clin(clinical_cat, clustergroups):
    ER_pos_neg = full_df.groupby(clustergroups)
    print('total')
    full_grp = full_df[clinical_cat].value_counts()
    full_series_len = full_df[clinical_cat].count()
    full_grp_df = pd.DataFrame()
    full_grp_df['counts'] = full_grp
    full_grp_df['%'] = round(100*(full_grp/full_series_len),2)
    print(full_grp_df)
    print('\n\n\n')
    for group in ER_pos_neg:
        grp = group[1]
        grp2 = grp[clinical_cat].value_counts()
        series_len = grp[clinical_cat].count()
        df = pd.DataFrame()
        df['counts'] = grp2
        df['%'] = round(100*(grp2/series_len),2)
        print(f'cluster {group[0]}')
        print(df)
        print()
cluster_clin('Pam50 + Claudin-low subtype', 'Integrative Cluster')

NameError: name 'full_df' is not defined

In [None]:
cluster_clin("ER Status", 'Integrative Cluster')

In [None]:
clus5 = new_clinical[new_clinical['Integrative Cluster'] == '']
clus5['Age at Diagnosis'].astype(float).median()

In [None]:
clusterframes['6'].iloc[:45,:]

In [None]:
full_clusterframe = pd.DataFrame(index=clusterframes['1'].index)
for i in clusterframes:
    series = clusterframes[i][f'cluster {i} qvalues']
    full_clusterframe = pd.concat([full_clusterframe, series.reindex(full_clusterframe.index)], axis=1)


In [None]:
full_clusterframe.to_csv("../data/pathway_qvalues.csv")

In [None]:
full_clusterframe.sort_values(by='cluster 6 qvalues', ascending = False)

In [None]:
clus9 = clusterframes['5']
clus9[clus9['cluster 5 qvalues'] > 2]

In [None]:
genes_components_per_pathway['Cell-Cell communication'][0]


In [None]:
pca_per_pathway['Calmodulin induced events'].hist(bins=100)

In [None]:
grouped_ttest_vs_intclust3.sort_values(by='9 qvalues', ascending = False)

In [None]:
from statsmodels.formula.api import ols
results = ols("Q('M Phase') ~C(Q('Integrative Cluster'))", data=full_df).fit()
results.summary()

In [None]:
import qvalue as qv

df = qv.qvalues(grouped_ttest_vs_intclust3, '1', '1 qv')
df