In [6]:
### This is a gene specific analysis notebook
import pickle, os
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu

In [7]:
# output directory
dir_out_figure = '../manuscript/figures_data/'
if not os.path.exists(dir_out_figure):
    os.makedirs(dir_out_figure)

***NACA***

In [8]:
### Read data -- depmap data in figure5 foler
dir_depmap = './figure5/depmap'
gene_name = 'NACA'

In [9]:
def get_lineage(gene, histology):
    ### Read file
    df_depmap = pd.read_csv(os.path.join(dir_depmap, gene+'_21Q2.csv'))
    print(df_depmap[df_depmap['Lineage'] == histology]['Lineage Subtype'].unique())

In [10]:
### Because NACA is a melanoma candidate, we find what lineage it is in
get_lineage(gene_name, 'Skin')

['Melanoma' 'Skin Squamous' 'Merkel Cell Carcinoma']


In [11]:
def get_CERES(gene, lineage):
    df_depmap = pd.read_csv(os.path.join(dir_depmap, gene+'_21Q2.csv'))
    df_depmap.columns = ['ID', 'CERES', 'Name', 'Primary Disease', 'Lineage', 'Lineage Subtype', 'Expression', 'Mutation']

    df_gene = df_depmap[df_depmap['Lineage Subtype'] == lineage]
    print(f'{len(df_gene)} in histology subtype')
    df_other = df_depmap[df_depmap['Lineage Subtype'] != lineage]
    print(f'{len(df_other)} not in histology subtype')
    df_all = pd.DataFrame({'inlineage':df_gene['CERES'],
                          'other':df_other['CERES']})
    df_all = df_all.melt()
    
    return df_all, df_gene, df_other

In [12]:
df_naca_ceres, inlineage, other = get_CERES(gene_name, 'Melanoma')

56 in histology subtype
921 not in histology subtype


In [13]:
testStat, pval = mannwhitneyu(inlineage["CERES"], other["CERES"])

In [15]:
dict_naca = {'df_ceres':df_naca_ceres,'testStat':testStat,'pval':pval}
pickle.dump(dict_naca, open(os.path.join(dir_out_figure, 'dict_naca.pkl'), 'wb'))

***CALR***

In [16]:
dir_depmap = './figure5/depmap'
gene_name = 'CALR'

In [17]:
get_lineage(gene_name, 'Uterus')

['Endometrial Adenocarcinoma' 'Endometrial Adenosquamous'
 'Endometrial Stromal Sarcoma' 'MMMT' 'Choriocarcinoma'
 'Uterine Carcinosarcoma' 'Endometrial Squamous' 'Mullerian Carcinoma'
 'Clear Cell Carcinoma']


In [18]:
df_calr_ceres, inlineage, other = get_CERES(gene_name, 'Endometrial Adenocarcinoma')

20 in histology subtype
969 not in histology subtype


In [19]:
testStat, pval = mannwhitneyu(inlineage["CERES"], other["CERES"])

In [20]:
dict_calr = {'df_ceres':df_calr_ceres,'testStat':testStat,'pval':pval}
pickle.dump(dict_calr, open(os.path.join(dir_out_figure, 'dict_calr.pkl'), 'wb'))