**Environment Set-Up**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np

**Data Loading (BRCA, COAD, KIRC, LUAD)**

In [3]:
brca = pd.read_csv('/content/drive/My Drive/Colab Notebooks/BINF 3350: Genomics & Bioinformatics/brca_sig.csv', sep=',')
brca = brca[["cancer_type", "ncbi_gene_id", "gene_symbol", "fdr_adjusted_p_value", "log2_fold_change"]]
brca

Unnamed: 0,cancer_type,ncbi_gene_id,gene_symbol,fdr_adjusted_p_value,log2_fold_change
0,BRCA,9636,ISG15,1.000000e-20,2.392468
1,BRCA,8784,TNFRSF18,1.000000e-20,2.275401
2,BRCA,441869,ANKRD65,1.000000e-20,-1.642720
3,BRCA,2563,GABRD,1.000000e-20,2.082703
4,BRCA,9651,PLCH2,2.770700e-19,-2.263603
...,...,...,...,...,...
1376,BRCA,57595,PDZD4,5.677900e-14,-1.963182
1377,BRCA,554,AVPR2,1.000000e-20,-3.378095
1378,BRCA,8270,LAGE3,1.000000e-20,1.586527
1379,BRCA,2157,F8,1.000000e-20,-1.803112


In [4]:
coad = pd.read_csv('/content/drive/My Drive/Colab Notebooks/BINF 3350: Genomics & Bioinformatics/coad_sig.csv', sep=',')
coad = coad[["cancer_type", "ncbi_gene_id", "gene_symbol", "fdr_adjusted_p_value", "log2_fold_change"]]
coad

Unnamed: 0,cancer_type,ncbi_gene_id,gene_symbol,fdr_adjusted_p_value,log2_fold_change
0,COAD,84069,PLEKHN1,1.000000e-20,1.593593
1,COAD,1953,MEGF6,1.000000e-20,1.755102
2,COAD,106614088,MIR34AHG,8.475300e-19,1.554834
3,COAD,10630,PDPN,1.000000e-20,1.753934
4,COAD,284723,SLC25A34,1.784500e-14,-3.617534
...,...,...,...,...,...
939,COAD,10813,UTP14A,1.000000e-20,1.520153
940,COAD,90161,HS6ST2,1.000000e-20,3.067983
941,COAD,2564,GABRE,1.000000e-20,2.128198
942,COAD,633,BGN,1.000000e-20,2.698140


In [5]:
kirc = pd.read_csv('/content/drive/My Drive/Colab Notebooks/BINF 3350: Genomics & Bioinformatics/kirc_sig.csv', sep=',')
kirc = kirc[["cancer_type", "ncbi_gene_id", "gene_symbol", "fdr_adjusted_p_value", "log2_fold_change"]]
kirc

Unnamed: 0,cancer_type,ncbi_gene_id,gene_symbol,fdr_adjusted_p_value,log2_fold_change
0,KIRC,102466751,MIR6859-1,1.000000e-20,1.981901
1,KIRC,57801,HES4,1.000000e-20,2.096344
2,KIRC,8784,TNFRSF18,1.000000e-20,1.569977
3,KIRC,7293,TNFRSF4,1.000000e-20,2.997710
4,KIRC,2563,GABRD,1.000000e-20,4.275529
...,...,...,...,...,...
1701,KIRC,139728,PNCK,1.000000e-20,6.500308
1702,KIRC,3897,L1CAM,3.391000e-18,-6.080103
1703,KIRC,554,AVPR2,1.280900e-12,-4.031331
1704,KIRC,139716,GAB3,1.000000e-20,1.554767


In [6]:
luad = pd.read_csv('/content/drive/My Drive/Colab Notebooks/BINF 3350: Genomics & Bioinformatics/luad_sig.csv', sep=',')
luad = luad[["cancer_type", "ncbi_gene_id", "gene_symbol", "fdr_adjusted_p_value", "log2_fold_change"]]
luad

Unnamed: 0,cancer_type,ncbi_gene_id,gene_symbol,fdr_adjusted_p_value,log2_fold_change
0,LUAD,84069,PLEKHN1,1.000000e-20,2.422771
1,LUAD,8784,TNFRSF18,1.000000e-20,2.086583
2,LUAD,339456,TMEM52,1.000000e-20,1.574812
3,LUAD,27237,ARHGEF16,1.000000e-20,1.712965
4,LUAD,388591,RNF207,1.000000e-20,1.551146
...,...,...,...,...,...
1256,LUAD,3149,HMGB3,1.000000e-20,3.156008
1257,LUAD,5365,PLXNB3,1.000000e-20,2.434562
1258,LUAD,26576,SRPK3,1.000000e-20,2.203514
1259,LUAD,554,AVPR2,2.804300e-13,-1.791606


**Identify Shared DEGs Across Cancer Types**

In [7]:
shared = pd.merge(pd.merge(pd.merge(brca, coad, on = ['ncbi_gene_id', 'gene_symbol']), kirc, on = ['ncbi_gene_id', 'gene_symbol']), luad, on = ['ncbi_gene_id', 'gene_symbol'])
shared.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,cancer_type_x,ncbi_gene_id,gene_symbol,fdr_adjusted_p_value_x,log2_fold_change_x,cancer_type_y,fdr_adjusted_p_value_y,log2_fold_change_y,cancer_type_x.1,fdr_adjusted_p_value_x.1,log2_fold_change_x.1,cancer_type_y.1,fdr_adjusted_p_value_y.1,log2_fold_change_y.1
0,BRCA,128239,IQGAP3,1e-20,3.725825,COAD,1e-20,2.070796,KIRC,1e-20,1.663117,LUAD,1e-20,3.521805
1,BRCA,6241,RRM2,1e-20,3.391862,COAD,1e-20,1.696422,KIRC,4.7956e-15,2.33384,LUAD,1e-20,3.249844
2,BRCA,699,BUB1,1e-20,3.28836,COAD,1e-20,1.637324,KIRC,5.5291e-12,1.753177,LUAD,1e-20,2.573548
3,BRCA,1290,COL5A2,1e-20,1.723367,COAD,1e-20,1.845367,KIRC,1e-20,1.985023,LUAD,1e-20,1.861748
4,BRCA,11170,FAM107A,1e-20,-4.267759,COAD,4.0682e-11,-3.86341,KIRC,1e-20,-1.817739,LUAD,7.5631e-13,-4.803873


In [8]:
list(shared.columns)

['cancer_type_x',
 'ncbi_gene_id',
 'gene_symbol',
 'fdr_adjusted_p_value_x',
 'log2_fold_change_x',
 'cancer_type_y',
 'fdr_adjusted_p_value_y',
 'log2_fold_change_y',
 'cancer_type_x',
 'fdr_adjusted_p_value_x',
 'log2_fold_change_x',
 'cancer_type_y',
 'fdr_adjusted_p_value_y',
 'log2_fold_change_y']

**Identify Shared DEGs Across Cancer Types**

In [9]:
sharedDF = shared.set_axis(['BRCA', 'NCBI_Gene_ID', 'Gene_Symbol', 'BRCA_FDR', 'BRCA_LFC', 'COAD', 'COAD_FDR', 'COAD_LFC', 'KIRC', 'KIRC_FDR', 'KIRC_LFC', 'LUAD', 'LUAD_FDR', 'LUAD_LFC'], axis = 1, inplace = False)
subsetDF = sharedDF[['NCBI_Gene_ID', 'Gene_Symbol', 'BRCA_FDR', 'BRCA_LFC', 'COAD_FDR', 'COAD_LFC', 'KIRC_FDR', 'KIRC_LFC', 'LUAD_FDR', 'LUAD_LFC']]
subsetDF.index = np.arange(1, len(subsetDF) + 1)
subsetDF

Unnamed: 0,NCBI_Gene_ID,Gene_Symbol,BRCA_FDR,BRCA_LFC,COAD_FDR,COAD_LFC,KIRC_FDR,KIRC_LFC,LUAD_FDR,LUAD_LFC
1,128239,IQGAP3,1e-20,3.725825,1e-20,2.070796,1e-20,1.663117,1e-20,3.521805
2,6241,RRM2,1e-20,3.391862,1e-20,1.696422,4.7956e-15,2.33384,1e-20,3.249844
3,699,BUB1,1e-20,3.28836,1e-20,1.637324,5.5291e-12,1.753177,1e-20,2.573548
4,1290,COL5A2,1e-20,1.723367,1e-20,1.845367,1e-20,1.985023,1e-20,1.861748
5,11170,FAM107A,1e-20,-4.267759,4.0682e-11,-3.86341,1e-20,-1.817739,7.5631e-13,-4.803873
6,125,ADH1B,9.6867e-13,-6.394212,1.0888e-12,-6.02607,1.4535e-12,-4.336386,1e-20,-4.337517
7,9319,TRIP13,1e-20,2.111323,1e-20,2.421709,1e-20,1.584231,1e-20,2.902119
8,57556,SEMA6A,1e-20,-2.058443,4.6403e-19,-2.736862,1e-20,1.776406,5.0458e-15,-2.864175
9,10112,KIF20A,1e-20,3.665427,1e-20,1.658623,6.5598e-20,2.134878,1e-20,3.311439
10,25928,SOSTDC1,1.2997e-13,-4.252037,4.2753e-13,-1.80463,2.7181e-20,-5.650733,3.0853e-13,-5.239818


**Export Shared DEGs**

In [10]:
subsetDF.to_csv('/content/drive/My Drive/Colab Notebooks/BINF 3350: Genomics & Bioinformatics/sharedDEGs.csv', index = False)