### Example notebook for WorkStation - Differential Gene Expression

This notebook demonstrates reading from API and performing analysis of differential gene expression.
- title: "Differential Gene Expression (DGE)"
- author: "Margaret Paiva"
- author: "Jon Katz"
- date: "19/10/2021"
- output: R notebook

In [17]:
# # Check your R version if packages are not compatible
# R.version

In [4]:
# # Install BiocManager
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

In [3]:
# # Install packages using BiocManager
# for (pkg in c("limma", "fgsea")) {
#     if (!requireNamespace(pkg, quietly = TRUE)) {
#         BiocManager::install(pkg, update = FALSE, ask = FALSE)
#     }
# }

In [31]:
suppressPackageStartupMessages(library(jsonlite))
suppressPackageStartupMessages(library(rjson))
suppressPackageStartupMessages(library(httr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(parallel))
suppressPackageStartupMessages(library(limma))
suppressPackageStartupMessages(library(fgsea))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(viridis))

Request data from API if it is not done yet.

In [8]:
# # In "gene_list.csv", define a list of genes of interest
# gene_list  <- as.list(read.csv("gene_list.csv")$x)
# gene_list[1:2]

In [9]:
# # Define the data to query from API
# data  <- list("request_data_type" = "expression",
#   "request_cancer_type" = c("Renal cell carcinoma", "Prostate", "Adenoid cystic carcinoma", "Breast", 
#                             "Thyroid", "Testicular", "Hepatocellular carcinoma", "Melanoma"),
# #   "request_genes" = c("ATM", "BRCA1", "BRCA2", "BRIP1", "CDK12", "FANCA", "HDAC2", "KRAS", 
# #                       "PALB2", "SRY", "TP53", "NOTCH1", "CCND1", "BARD1", "FBLN2", "CDKN1B", 
# #                       "RB1", "CHEK2", "APOBEC3B", "PALB2"),  # to define a list of genes here
#   "request_genes" = gene_list,  # if a list of genes is defined in a file
#   "request_dataset" = "PDX",
#   "request_key" = "d787ddc1e45f4ac2b68ac43419500515",
#   "request_client" = 99,
#   "request_user" = 99,
#   "request_mode" = 'true') 

In [14]:
# # Request data from API - this may take some time
# request  <-  POST(url = 'https://lumin-fast-api.championsoncology.com/workstation', 
#                   body = data, encode = 'json')
# request
# # A successful request will give "Status: 200"

In [16]:
# # This will save the data file as a .json file in your root directory
# response <- content(request)
# response

In [27]:
# Enter the file name of the .json file in your root directory below
lt  <-  fromJSON(file = "./data/requested_data---19-10-2021---20_44_20.json")

In [28]:
# Create a dataframe from the data
df  <- do.call(rbind, lapply(lt, rbind))
df  <- as.data.frame(df)
df$gene  <- as.character(df$gene)
df$model  <- as.character(df$model)
df$log.tpm  <- as.numeric(df$log.tpm)
head(df, 2)
print(dim(df))

v1,gene_id,log.rsem.rpkm,log.tpm,z,fold,trans,model,sort_key,gene,model_name,tumor_type
19463231,ENSG00000005187,2.03154538924124,2.741149,-0.169317194943646,0.876569685689051,ENST00000440284:49.26% ENST00000567387:17.78% ENST00000501740:11.52% ENST00000289416:10.09%,CTG-3501,CTG-3501_Expression_1,ACSM3,CTG-3501,Breast
19463234,ENSG00000005243,0.43494348437293,0.7995996,-0.994615041230675,0.185174375597962,ENST00000006101:78.76% ENST00000579263:20.8%,CTG-3501,CTG-3501_Expression_1,COPZ2,CTG-3501,Breast


[1] 82800    12


In [32]:
# Pivot the data to have 'gene' as rows, 'model' as colomns, and 'log.tpm' as values
# And filter out values close to 0
log_tpm <- df %>% 
    select(c('gene', 'model', 'log.tpm'))  %>% 
    pivot_wider(names_from = model, values_from = log.tpm)  %>% 
    column_to_rownames('gene')
log_tpm  <- log_tpm[complete.cases(log_tpm),]  # remove rows with missing values
log_tpm[1:2, 1:10]
print(dim(log_tpm))

Unnamed: 0,CTG-3501,CTG-0718,CTG-1181,CTG-0437,CTG-1843,CTG-0456,CTG-2768,CTG-1379,CTG-1471,CTG-0500
ACSM3,2.741149,3.21443,2.694151,2.08215,5.471633,2.683937,0.7423767,3.333013,0.0253033,0.3042442
COPZ2,0.7995996,4.130462,7.072492,1.603433,5.820096,6.319975,6.3414629,3.313474,0.0,5.1539376


[1] 400 207


In [20]:
load("../data/RData_uncomp_20210714", verbose=TRUE)

Loading objects:
  cnv.db
  lcpm.list
  lrpkm.lst
  mut.db
  pathways.gmt
  pathways_namming
  rna.seq
  rna.seq.heme
  symbols.dt


In [23]:
head(cnv.db)

Model,Gene,Alteration
CTG-0009,OR4F5,Gain
CTG-0009,IFI6,Loss
CTG-0009,FAM76A,Loss
CTG-0009,STX12,Loss
CTG-0009,PPP1R8,Loss
CTG-0009,THEMIS2,Loss


In [24]:
head(lcpm.list)

Unnamed: 0,CTG-0009,CTG-0011,CTG-0012,CTG-0017,CTG-0018,CTG-0019,CTG-0033,CTG-0052,CTG-0058,CTG-0059,...,CTG-3502,CTG-3503,CTG-3522,CTG-3533,CTG-3536,CTG-3537,CTG-3570,CTG-3579,CTG-3581,CTG-3628
ENSG00000000003,5.7196100,4.9227981,5.20164929,6.07866203,6.3530767,4.75802010,4.61243445,5.76388231,7.23251448,6.94097812,...,5.3960748,5.63113145,5.96538789,4.26519007,7.5843994,5.044880854,5.6818174,8.2087663,5.801617817,6.4789243
ENSG00000000005,1.1624794,0.0000000,2.52720294,3.98701657,0.0000000,0.00000000,0.02065102,0.00000000,0.17332090,0.06937113,...,0.0000000,0.00000000,0.72257972,0.00000000,1.0396853,0.000000000,0.2540629,0.7991451,0.148305951,0.3441766
ENSG00000000419,4.9989062,5.2656265,5.85794331,5.74512227,6.1245969,5.63550590,5.86916656,6.93940319,6.55426660,5.07315958,...,6.2496273,7.83638726,6.25226091,5.59443857,6.7947385,5.319974496,4.9030663,6.4936597,5.257328308,5.1843120
ENSG00000000457,4.7053049,4.5745569,4.94422172,5.64051143,4.6230550,4.89843730,5.33383593,4.57389104,4.41873852,4.18076039,...,4.9998223,5.30576992,4.04144696,5.64161242,4.5300926,4.431580594,4.9974632,4.3262496,4.672780993,4.7003374
ENSG00000000460,3.8976318,3.6717167,4.56784082,5.62479876,4.3749585,4.68004765,4.99756978,4.58272147,3.93535681,5.12673093,...,5.7060290,5.61032342,3.97140901,5.24530547,5.5067395,3.886819868,3.2655213,4.6641652,4.756887081,4.0220626
ENSG00000000938,2.2529493,0.2781741,0.49276601,0.01857694,0.0000000,0.11570380,0.22643844,0.43044732,0.47129690,0.08807980,...,0.1036955,0.00000000,0.02129137,0.06669830,0.0000000,0.001348553,2.8551923,0.0000000,0.000000000,0.0000000
ENSG00000000971,6.4874946,7.8340956,0.25944292,0.00000000,0.8904356,4.44398034,0.33071421,0.30270528,0.22153748,0.10554855,...,0.6266389,0.00000000,2.12336143,0.11945138,0.1088710,0.132290875,7.3866834,0.1487842,0.490260645,0.1573791
ENSG00000001036,5.2555455,7.1519277,5.63864418,4.78061283,6.3315445,6.44654587,5.74736964,6.83685670,7.58385090,7.34453562,...,5.5525729,6.43167467,5.27266172,5.48156981,7.5350081,7.759150624,5.7186095,7.2176572,5.267196664,6.5866052
ENSG00000001084,6.4771639,4.9530508,5.77168009,6.42354730,5.5006222,6.62418390,5.26097615,5.95647781,7.22055037,6.21506735,...,6.8378115,5.72529255,6.26893333,5.84076208,6.2957178,5.513483397,5.8157439,6.6761778,5.581366872,6.6946988
ENSG00000001167,5.9165800,5.7257860,5.39266922,6.29216918,6.0226720,6.35160250,5.46596136,5.99993983,5.22050243,5.89535174,...,7.2915929,7.69071115,6.38010998,6.35373613,5.5906802,6.281468255,5.1684622,5.7237775,6.557354855,4.7831605

V1,Model,Type,Project,Date,Vendor,Batch
1,CTG-0009,Adenoid cystic carcinoma,Project_SLO_12525_RNA,7/1/2017,Envigo,12
2,CTG-0011,Cholangiocarcinoma,Project_PAZ_10123_RNA,1/1/2015,NYGC,3
3,CTG-0012,Breast,Project_PAZ_01264_RNA,6/1/2014,NYGC,2
4,CTG-0017,Breast,Project_PAZ_01264_RNA,6/1/2014,NYGC,2
5,CTG-0018,Breast,Project_PAZ_01264_RNA,6/1/2014,NYGC,2
6,CTG-0019,Breast,Project_SLO_13604_RNA,10/1/2018,Envigo,16
7,CTG-0033,Breast,Project_PAZ_11161_RNA,1/1/2016,NYGC,7
8,CTG-0052,Breast,Project_PAZ_01264_RNA,6/1/2014,NYGC,2
9,CTG-0058,Colorectal,Project_PAZ_10123_RNA,1/1/2015,NYGC,3
10,CTG-0059,Cervical,Project_PENNY_17073_RNA,11/1/2016,Covance,10

Unnamed: 0,CTG-2227,CTG-2228,CTG-2229,CTG-2232,CTG-2233,CTG-2234,CTG-2235,CTG-2236,CTG-2238,CTG-2239,...,CTG-2704,CTG-2774,CTG-2775,CTG-3438,CTG-3439,CTG-3440,CTG-3441,CTG-3670,CTG-3674,CTG-3679
ENSG00000000419,4.2181885,4.9501736,4.285800099,4.7414998,4.2087611,4.1547131,3.7889320,4.6381594,4.36295047,4.2511442,...,4.7142237,4.8255547,4.3283353,3.7934218,4.2481360,4.3198335,4.12667021,3.9943739,4.4385018,4.6769647
ENSG00000000457,4.0420629,3.5051627,4.178512765,3.8174799,3.5391617,4.2654647,4.1576533,3.4514084,3.82419683,4.1764241,...,3.8437000,3.3826978,3.3726466,4.3273520,3.8946867,4.2778972,3.96487976,3.5987891,3.8866678,3.7578125
ENSG00000000460,2.9216343,2.5085447,2.487229492,2.2912750,3.3097996,2.3411913,2.2591113,3.2806129,2.84682339,2.8664176,...,2.4500012,2.7401164,2.3559427,2.9302502,2.5257975,3.4222738,2.59184693,2.6914819,3.3660973,2.9346967
ENSG00000000938,5.1243070,9.4171019,4.993713945,8.8118464,8.4589456,4.9384460,3.6313345,7.9593719,8.41575246,5.2999915,...,7.7152700,8.6953903,9.2690019,5.7180812,9.5988741,7.3829706,7.14586482,7.2298456,5.2639807,7.5473547
ENSG00000000971,4.8468221,1.8357499,4.504933663,1.7718391,4.7561830,3.4926176,1.6642795,3.4594050,1.86192720,2.1931002,...,3.0758254,2.0482160,2.6923235,1.3220858,6.5176126,3.1549222,5.33307524,5.8117644,4.6033302,4.2442824
ENSG00000001036,4.8104563,5.8674773,3.793660433,5.3585069,4.3994978,4.3654160,3.7810354,4.3704128,5.26511397,3.8309117,...,4.5082478,4.9054274,5.2995484,4.5976826,5.2671917,4.5886961,5.06755762,5.0960204,5.2930306,5.1447201
ENSG00000001084,5.0278234,5.2763329,4.472561695,5.0775262,4.9277950,5.0916081,4.5878744,4.4062656,4.92891443,3.0787210,...,5.8445846,4.4725635,4.8398793,4.5762900,5.0084809,4.3066051,4.00752465,5.0247845,4.8391054,4.5942318
ENSG00000001167,5.2304218,5.1838556,5.279195750,4.3375240,4.5986779,5.3246147,4.8553424,4.6095760,4.41676294,5.0892046,...,4.9659564,4.8164905,4.8616883,5.0195244,4.6240909,4.6218174,4.55264963,4.9087356,5.0742551,5.2098775
ENSG00000001460,2.9884159,3.3642481,3.425462270,2.7858208,2.2020676,3.2530583,3.2499548,2.4020989,2.38145458,3.2229877,...,2.5487707,3.2297890,2.1098236,3.5948910,2.6723181,2.8863058,2.12028349,3.0215560,2.8324767,2.7771438
ENSG00000001461,5.0903391,5.1472468,4.398954101,4.0203390,3.8212944,5.3431527,4.1848001,4.0931089,3.94081713,3.8455604,...,3.2635784,3.6971121,2.9732456,4.9123417,4.3877528,4.6254761,4.29892566,4.1806978,4.6754045,4.5870137

V1,Model,Type,Batch,Vendor,Date,Project
1,CTG-2227,AML,1,Piccard,10/1/2020,RNA-AML
2,CTG-2228,AML,2,Piccard,10/1/2020,RNA-AML
3,CTG-2229,AML,1,Piccard,10/1/2020,RNA-AML
4,CTG-2232,AML,1,Piccard,10/1/2020,RNA-AML
5,CTG-2233,AML,1,Piccard,10/1/2020,RNA-AML
6,CTG-2234,AML,1,Piccard,10/1/2020,RNA-AML
7,CTG-2235,AML,2,Piccard,10/1/2020,RNA-AML
8,CTG-2236,AML,1,Piccard,10/1/2020,RNA-AML
9,CTG-2238,AML,1,Piccard,10/1/2020,RNA-AML
10,CTG-2239,AML,1,Piccard,10/1/2020,RNA-AML


In [25]:
head(lrpkm.lst)

Unnamed: 0,CTG-0009,CTG-0011,CTG-0012,CTG-0017,CTG-0018,CTG-0019,CTG-0033,CTG-0052,CTG-0058,CTG-0059,...,CTG-3502,CTG-3503,CTG-3522,CTG-3533,CTG-3536,CTG-3537,CTG-3570,CTG-3579,CTG-3581,CTG-3628
,0.59514866,0.8929991,0.00000000,0.13541493,0.00000000,0.2195225671,0.05964169,0.00000000,1.79557886,0.060113537,...,0.24054112,0.00000000,0.731046928,0.9810237,0.008126247,0.30861506,0.27805784,0.008126247,0.423829227,0.329386585
,0.40121222,0.1841346,0.44715375,0.29861269,0.27935420,0.2563832582,0.27317116,0.24342648,0.20014829,0.275015194,...,0.22283960,0.26930750,0.344044237,0.2239892,0.283721492,0.32928391,0.36726839,0.238689756,0.289077045,0.281594490
A1BG,1.63103221,0.3242087,3.02178627,1.13631364,2.78230787,1.8170266321,3.12175105,4.89344412,0.33023043,1.312709189,...,2.24737122,1.30394965,1.592315447,1.4521720,0.823002526,2.09302461,1.56489429,0.724554512,2.788857113,0.249408243
A1CF,0.18271694,1.5717080,0.21133269,0.15865436,0.15107211,0.0993673849,0.08350347,0.12880481,2.11747264,0.000000000,...,0.01600222,0.00000000,0.000000000,0.0000000,3.284617681,0.33916996,0.00000000,2.866682955,0.000000000,1.380178782
A2M,7.28996718,0.6376022,9.27679152,1.65660399,0.00000000,0.7211822946,0.40166650,7.15744067,0.46432172,1.423082229,...,0.32328714,0.36078982,1.863472824,0.7273975,0.000000000,0.06251033,6.52008198,0.044393928,1.801263434,0.419607656
A2ML1,1.20896297,0.7288837,3.22946074,1.84195070,4.65508044,3.0645015837,0.18477681,3.75986938,0.28485847,1.567335099,...,0.82614993,1.95704910,0.460986412,0.6980417,0.000000000,0.27327339,2.15507581,0.000000000,4.138463981,0.000000000
A4GALT,2.18322239,1.8199494,0.99933422,0.52205486,2.43775690,0.5966488643,0.00000000,1.40587227,1.63310426,2.986454425,...,0.17578030,0.42844258,1.156817882,0.1588060,0.000000000,0.39351757,3.11523627,0.000000000,1.428996714,0.943098905
A4GNT,0.00000000,0.7638416,0.40487977,0.02023315,0.02023315,0.0653575529,0.00000000,0.19158551,0.00000000,0.000000000,...,0.10552534,0.08004024,0.003089946,0.0000000,0.000000000,0.00000000,0.00000000,0.000000000,0.015026779,0.003089946
AAAS,4.93203662,4.1182804,4.08302898,3.37235581,4.89956090,4.7746158978,4.58865381,4.58546236,4.67925744,4.841770613,...,4.91537522,5.29347397,4.147117865,5.2900304,4.412584395,4.66690519,4.47489251,4.921937021,5.241799874,4.951147898
AACS,3.79163022,4.7144054,4.60992717,3.00104197,4.33387734,3.9490406701,3.28702242,3.78827950,4.67530715,4.018108342,...,4.18535874,3.80325937,3.989916232,3.3436013,4.006183077,4.65501058,3.18033918,3.592038573,3.771323849,4.976983013

Unnamed: 0,CTG-2227,CTG-2228,CTG-2229,CTG-2232,CTG-2233,CTG-2234,CTG-2235,CTG-2236,CTG-2238,CTG-2239,...,CTG-2704,CTG-2774,CTG-2775,CTG-3438,CTG-3439,CTG-3440,CTG-3441,CTG-3670,CTG-3674,CTG-3679
A1BG,4.32963624,3.7370753,3.3890168,5.27711973,3.51728440,4.97906439,5.8128510,5.05955428,4.6515952,4.7071141,...,4.57063454,3.8219785,5.13776383,5.55081994,3.79185186,4.2702690,4.3214724,4.4318895,4.61935262,3.20528023
A2M,0.07849563,0.4043966,0.1985864,0.20195639,0.19724131,0.07849563,0.1309505,0.17870174,0.4043070,0.4205063,...,0.03307919,0.0000000,0.68977862,0.07849563,0.81657529,0.0000000,1.5952246,0.3020029,0.13824383,0.21341150
A4GALT,1.53035921,0.4771292,0.4270012,0.33068548,0.63006296,1.11957630,0.6763093,0.04072279,0.2153282,0.3694539,...,0.04485867,0.4914891,0.07229201,2.47573198,0.09385712,1.3554201,0.5013456,0.1678316,0.27095677,1.91659544
AAAS,4.25328642,3.9900252,4.3809646,4.14067423,4.17170725,4.17630687,4.4798449,4.49521775,4.7586104,4.5189116,...,3.77471859,4.4325174,4.70019714,4.84393027,4.08637499,4.5728288,4.3348900,4.6007840,4.53913349,4.24671792
AACS,2.32007287,2.5867378,2.5906941,2.63063188,2.04298732,3.01325940,2.5589271,1.88003089,2.1256486,2.1088921,...,2.10607274,2.1042195,2.47954867,2.76746585,2.56878508,2.5037114,3.1111791,3.1903526,2.59114355,2.53779277
AAED1,3.09886107,2.9094237,3.1936379,2.93891960,2.41155127,2.98904578,3.2739702,2.84212674,2.5532312,2.0450275,...,3.62697649,2.4905039,2.94000951,1.13418385,3.00013870,2.1242286,2.3167963,2.3676789,1.18396970,2.75612957
AAGAB,4.10827977,3.7199623,3.4675042,3.48814909,3.86270624,4.13932711,4.2215155,3.62094793,3.2927964,3.6047055,...,3.17236413,3.2201938,2.98271224,3.10933263,3.83137568,3.5366478,3.9574389,4.0792133,3.57937560,4.13905605
AAK1,1.55335976,2.3077627,2.3011483,1.82616513,1.66017150,1.44560150,1.5016727,1.29148487,2.2601992,3.8940079,...,2.44068879,2.2916298,2.69307046,3.01754177,1.65684150,1.5860924,2.9095386,2.0813118,1.82727896,0.88111491
AAMDC,3.74473318,3.3832746,3.5531552,3.67220456,3.03315860,3.71949618,4.2511748,3.84369641,4.0631106,4.9541496,...,2.66402566,3.3681788,3.34394885,4.00049205,3.41222236,3.9025023,3.9049464,4.3347910,3.33083150,3.71793359
AAMP,4.97689178,5.5509072,5.1152528,5.48826918,5.05321327,5.30300308,5.7894626,5.42204700,5.5256129,5.1049279,...,5.02419694,5.0824238,5.33646289,5.50889104,4.91125924,5.1835925,5.2286093,5.1571012,5.29773083,5.22661317
