Skip to content

the-ahuja-lab/CancerSmell

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

8 Commits
 
 
 
 
 
 

Repository files navigation

CancerSmell

Workflow

Load essential Libraries
The initial step is to load all the necessary libraries

#Loading Essential Libraries
library("reticulate") #To use one Python Library "Scrublet"
library("AnnotationDbi") #To convert one Id to Another
library("org.Hs.eg.db") #To use Genome wide annotation for Human
library(dplyr) #For Datasets Manipulation
library(tidyr) #For Datasets Manipulation
library(data.table) #For Datasets Manipulation
library(zFPKM) #To calulate zFPKM scores
library(Seurat) #For single cell analysis
library(ggplot2) #To plot graphs
library(RColorBrewer) #To define color-palette
library("GSVA") #To compute Gene Set Variation Analysis
library(GSEABase) #To compute Gene Set Variation Analysis
library(GSVAdata) #To compute Gene Set Variation Analysis
library(msigdb) #To load gmt file

Once all the libraries are properly loaded, we need to provide input files. This pipeline takes into account 6 sub famlies of GPCRs. The format of all these 6 Input Files is a sinlge column csv file that contains gene names as entries and column heading as "Symbol"

Input Receptor files

f_or<-read.csv("Main_pipeline/Receptor_files/functional_or.csv",header=TRUE)
p_or<-read.csv("Main_pipeline/Receptor_files/pseudo_or.csv",header=TRUE)
t1r<-read.csv("Main_pipeline/Receptor_files/t1r.csv",header=TRUE)
t2r<-read.csv("Main_pipeline/Receptor_files/t2r.csv",header=TRUE)
taar<-read.csv("Main_pipeline/Receptor_files/taar.csv",header=TRUE)
v1r<-read.csv("Main_pipeline/Receptor_files/v1r.csv",header=TRUE)

Loading Expression file
In this documntation, the sample file used is under GSE75688 folder. The structure of the file is shown below.

CellName SRR2973279 SRR2973280 SRR2973281 SRR2973282 SRR2973283
Sample BC01 BC01 BC01 BC01 BC01
ENSG00000000003 0.76869 0.625649 0.546056 0.743321 0.163526
ENSG00000000005 0 0 0 0 0
ENSG00000000457 2.333028 0.593446 68.046238 8.740911 61.360909
#Reading Raw tpm matrix
exp<-read.csv("Main_pipeline/GSE75688/GSE75688_PCG_beforeQC.txt",header=TRUE,
               row.names=1,sep="\t",stringsAsFactors=FALSE)

File Manipulation
Since the first row contains non-numneric values, the first line of the file was removed.

exp<-exp[-1,]

Now our file appears like this

CellName SRR2973279 SRR2973280 SRR2973281 SRR2973282 SRR2973283
ENSG00000000003 0.76869 0.625649 0.546056 0.743321 0.163526
ENSG00000000005 0 0 0 0 0
ENSG00000000457 2.333028 0.593446 68.046238 8.740911 61.360909

The Gene Id's present in this input file is in ENSEMBL format whereas our receptor file contains gene names. So we need to convert these ENSEMBL ID's to Gene Names

exp$Gene <- mapIds(org.Hs.eg.db,keys=row.names(exp),column="SYMBOL", keytype="ENSEMBL", multiVals="first")
exp<-distinct(exp,Gene, .keep_all= TRUE)
exp<-exp %>% drop_na(Gene)
row.names(exp)<-exp$Gene
exp$Gene<-NULL
t_exp<-t(exp)
t_exp<-as.data.frame(t_exp)

After ID conversion file will apper like this.

CellName SRR2973279 SRR2973280 SRR2973281 SRR2973282 SRR2973283
TSPAN6 0.76869 0.625649 0.546056 0.743321 0.163526
TNMD 0 0 0 0 0
DPM1 2.333028 0.593446 68.046238 8.740911 61.360909

Saving Files

exp<-setDT(exp, keep.rownames = "Gene_names")[]
write.table(exp,file="Main_pipeline/GSE75688/raw_input.csv",sep=",",quote=FALSE,row.names = FALSE,col.names = TRUE)
t_exp<-setDT(t_exp, keep.rownames = "Cell_names")[]
write.table(t_exp,file="Main_pipeline/GSE75688/raw_input_for_doublet.csv",sep=",",quote=FALSE,
            row.names = FALSE,col.names = TRUE)

Doublet Identification
For this purpose python package "Scrublet" (0.2.1) was used.

import pandas as pd
import numpy as np
import scrublet as scr
data = pd.io.parsers.read_csv("Main_pipeline/GSE75688/raw_input_for_doublet.csv",sep=",",index_col=0)
indexNamesArr = data.index.values
listOfRowIndexLabels = list(indexNamesArr)
listOfRowIndexLabels
row_names=pd.DataFrame(listOfRowIndexLabels)
scrub = scr.Scrublet(data)
doublet_scores, predicted_doublets = scrub.scrub_doublets()
score=pd.DataFrame(doublet_scores)
value=pd.DataFrame(predicted_doublets)
a=np.concatenate((row_names,score),axis=1)
b=np.concatenate((a,value),axis=1)
final=pd.DataFrame(b)
final.columns = ['Cell','Doublet_Score','Doublet']
print(final)
final.to_csv(r'Main_pipeline/GSE75688/doublet_analysis.csv',index = None, header=True)
exit()

Alternatively the above code has been saved in the file doublet.py and can be called in R only using reticulate() library. For this you need to pass the location where your python is installed in the function use_python. For eg

use_python("/home/siddhant18241/miniconda3/bin/python3", required = T)
py_config()
source_python("Main_pipeline/GSE75688/doublet.py")

Doublet Analysis will output the result in "doublet_analysis.csv". This is the three column file that contains Cell_Id, Doublet_Score, and Doublet_Status

Cell Doublet_Score Doublet
SRR2973279 0.02072538860103627 False
SRR2973280 0.017369727047146403 False
SRR2973281 0.017369727047146403 False

Doublet Status will be in the form of TRUE/FALSE, only FALSE cells are further used for analysis.

doublet<-read.csv("Main_pipeline/GSE75688/doublet_analysis.csv")
doublet<-subset(doublet, Doublet=="False" )
doublet$Doublet_Score<-NULL
doublet$Doublet<-NULL
doublets_removed<-merge(doublet,t_exp,by.x="Cell",by.y="Cell_names")
row.names(doublets_removed)<-doublets_removed[,1]
doublets_removed$Cell<-NULL
t_doublets_removed<-t(doublets_removed)
t_doublets_removed<-as.data.frame(t_doublets_removed)
t_doublets_removed<-setDT(t_doublets_removed, keep.rownames = "Gene_names")[]
write.table(t_doublets_removed,file="Main_pipeline/GSE75688/Doublets_removed.csv",sep=",",row.names=FALSE,col.names = TRUE,quote=FALSE)

To save space the variables that don't have any role now are assigned a NULL value.

exp<-NULL
t_exp<-NULL
doublet<-NULL
doublets_removed<-NULL
t_doublets_removed<-NULL

The next step in the analysis is to identify the active and inactive genes based on their zFPKM values.

num_data<-read.csv("Main_pipeline/GSE75688/Doublets_removed.csv",row.names = 1)
zfpkm<-zFPKM(num_data, assayName = "tpm")
zfpkm<-as.data.frame(zfpkm)
zfpkm$Median_Value <- apply(zfpkm, 1, FUN=median, na.rm=TRUE)

Active genes are only those genes that have Median of zFPKM value >-3

filtered_genes<-subset(zfpkm, Median_Value>=-3 )
filtered_genes$Median_Value<-NULL
list_of_filtered_genes<-row.names(filtered_genes)
list_of_filtered_genes<-as.data.frame(list_of_filtered_genes)
colnames(list_of_filtered_genes)<-c("Gene_names")

Since num_data contains all genes and only non doublet cells, we need to select only those genes which has median value >-3.

num_data<-as.data.frame(num_data)
num_data<-setDT(num_data, keep.rownames = "Gene_names")[]
seurat_input<-merge(list_of_filtered_genes,num_data,by.x="Gene_names",by.y="Gene_names")
write.table(seurat_input,file="Main_pipeline/GSE75688/seurat_input.csv",row.names=FALSE,
            col.names = TRUE,sep=",",quote = FALSE)
row.names(seurat_input)<-seurat_input$Gene_names
seurat_input$Gene_names<-NULL

filtered_genes was also intersected with zfpkm

zfpkm<-as.data.frame(zfpkm)
zfpkm<-setDT(zfpkm, keep.rownames = "Gene_names")[]
active_zfpkm_genes<-merge(list_of_filtered_genes,zfpkm,by.x="Gene_names",by.y="Gene_names")
active_zfpkm_genes$Median_Value<-NULL
write.table(active_zfpkm_genes,file="Main_pipeline/GSE75688/Active_genes_zfpkm_values.csv",row.names=FALSE,col.names = TRUE,sep=",",quote = FALSE)

Only seurat_input and active_zfpk_genes will be further used in the analysis. To reduce space again we removed the outdated variables.

num_data<-NULL
zfpkm<-NULL
filtered_genes<-NULL
list_of_filtered_genes<-NULL

SEURAT_implementation
Creating seurat object

pb <- CreateSeuratObject(counts = seurat_input, min.cells = 3, min.features = 200)
pb

Generating several plots

VlnPlot(pb, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
 plot2 <- FeatureScatter(pb, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
 plot(plot2)

Since we used raw matrix we need to perform normalization

pb <- NormalizeData(pb)
pb <- FindVariableFeatures(pb, selection.method = "vst", nfeatures = 2000)
list_of_variable_features<-VariableFeatures(pb)
list_of_variable_features<-as.data.frame(list_of_variable_features)
write.table(list_of_variable_features,file="Main_pipeline/GSE75688/List_of_variable_features.csv",
            sep=",",row.names=FALSE,quote = FALSE)

Further analysis: Scaling of data

all.genes <- rownames(pb)
pb <- ScaleData(pb, features = all.genes)

Dimension Reduction

pb <- RunPCA(pb, features = VariableFeatures(object = pb))
VizDimLoadings(pb, dims = 1:2, reduction = "pca")
DimPlot(pb, reduction = "pca")

Plotting HeatMap

DimHeatmap(pb, dims = 1:15, cells = 500, balanced = TRUE)
pb <- JackStraw(pb, num.replicate = 100)
pb <- ScoreJackStraw(pb, dims = 1:20)
JackStrawPlot(pb, dims = 1:15)
ElbowPlot(pb)

Finding Neighbours and perforimg Cluster analysis

pb <- FindNeighbors(pb, dims = 1:10)
pb <- FindClusters(pb, resolution = 0.5)
head(Idents(pb), 5)
pb <- RunUMAP(pb, dims = 1:10)
DimPlot(pb, reduction = "umap")

Finding Markers of all clusters

pb.markers <- FindAllMarkers(pb, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.table(pb.markers,file="Main_pipeline/GSE75688/Markers_info.csv",
            sep=",",row.names=FALSE,col.names = TRUE,quote = FALSE)
row_names<-row.names(seurat_input)
row_names<-as.data.frame(row_names)
seurat_input_copy<-seurat_input
seurat_input_copy<-setDT(seurat_input_copy, keep.rownames = TRUE)[]
write.table(seurat_input_copy,file="Main_pipeline/GSE75688/copy_of_seu_input.csv",
            sep=",",quote=FALSE,row.names = FALSE,col.names = TRUE)

Plotting UMAPS for FUNCTIONAL ORs detected.

factor0<-merge(row_names,f_or,by.x="row_names",by.y="Symbol")
factor0<-as.matrix(factor0)
a<-length(factor0)
for (j in 1:a){
  tryCatch(
    {
      
      k<-FeaturePlot(pb, features = c(factor0[j,]),cols=my_col)
      plot(k)
      
    },
    error=function(cond) {
      message(paste("."))
      
    },
    warning=function(cond) {
      message(paste("."))
      
    },
    finally={
      
    }
  )    
}

For example, one of the UMAP is shown below.

check<-Idents(pb)
check<-as.data.frame(check)
#Converting row names as first column  
check<-setDT(check, keep.rownames = TRUE)[]
#Assigning column names to check
colnames(check)<- c("Ids","Cluster")
x<-merge(f_or,seurat_input_copy,by.x="Symbol",by.y="rn")
first_col<-x[,1]
first_col<-as.matrix(first_col)
t_comb<-t(x)
t_comb<-as.data.frame(t_comb)
t_comb<-setDT(t_comb, keep.rownames = TRUE)[]
names(t_comb) <- as.matrix(t_comb[1,])
t_comb <- t_comb[-1,]
Functional_Olfactory_info<-merge(check,t_comb,by.x="Ids",by.y="Symbol")
write.table(Functional_Olfactory_info,"Main_pipeline/GSE75688/Functional_Olfactory_info.csv",
            row.names=FALSE,sep=",",quote = FALSE)
x<-NULL
Functional_Olfactory_info<-as.data.frame(Functional_Olfactory_info)
row.names(Functional_Olfactory_info)<-Functional_Olfactory_info$Ids
Functional_Olfactory_info$Ids<-NULL
Functional_Olfactory_info<-t(Functional_Olfactory_info)
Functional_Olfactory_info<-as.data.frame(Functional_Olfactory_info)

Similar work was done for TAAR, VIR, V2R, TIR and Pseudo ORs

#Plotting UMAPS for TAARs detected.
factor0<-merge(row_names,taar,by.x="row_names",by.y="Symbol")
factor0<-as.matrix(factor0)
a<-length(factor0)
pdf(file="Main_pipeline/GSE756881/UMAP_TAAR_plots.pdf")
for (j in 1:a){
  tryCatch(
    {
      
      k<-FeaturePlot(pb, features = c(factor0[j,]),cols=my_col)
      plot(k)
      
    },
    error=function(cond) {
      message(paste("."))
      
    },
    warning=function(cond) {
      message(paste("."))
      
    },
    finally={
      
    }
  )    
}
dev.off()

check<-Idents(pb)
check<-as.data.frame(check)
#Converting row names as first column  
check<-setDT(check, keep.rownames = TRUE)[]
#Assigning column names to check
colnames(check)<- c("Ids","Cluster")
x<-merge(taar,seurat_input_copy,by.x="Symbol",by.y="rn")
first_col<-x[,1]
first_col<-as.matrix(first_col)
t_comb<-t(x)
t_comb<-as.data.frame(t_comb)
t_comb<-setDT(t_comb, keep.rownames = TRUE)[]
names(t_comb) <- as.matrix(t_comb[1,])
t_comb <- t_comb[-1,]
Functional_TAAR_info<-merge(check,t_comb,by.x="Ids",by.y="Symbol")
write.table(Functional_TAAR_info,"Main_pipeline/GSE756881/Functional_TAAR_info.csv",row.names=FALSE,sep=",",quote = FALSE)
x<-NULL
Functional_TAAR_info<-as.data.frame(Functional_TAAR_info)
row.names(Functional_TAAR_info)<-Functional_TAAR_info$Ids
Functional_TAAR_info$Ids<-NULL
Functional_TAAR_info<-t(Functional_TAAR_info)
Functional_TAAR_info<-as.data.frame(Functional_TAAR_info)


#___________________________________________________________________________________
#Plotting UMAPS for V1Rs detected.
factor0<-merge(row_names,v1r,by.x="row_names",by.y="Symbol")
factor0<-as.matrix(factor0)
a<-length(factor0)
pdf(file="Main_pipeline/GSE756881/UMAP_V1R_plots.pdf")
for (j in 1:a){
  tryCatch(
    {
      
      k<-FeaturePlot(pb, features = c(factor0[j,]),cols=my_col)
      plot(k)
      
    },
    error=function(cond) {
      message(paste("."))
      
    },
    warning=function(cond) {
      message(paste("."))
      
    },
    finally={
      
    }
  )    
}
dev.off()

check<-Idents(pb)
check<-as.data.frame(check)
#Converting row names as first column  
check<-setDT(check, keep.rownames = TRUE)[]
#Assigning column names to check
colnames(check)<- c("Ids","Cluster")
x<-merge(v1r,seurat_input_copy,by.x="Symbol",by.y="rn")
first_col<-x[,1]
first_col<-as.matrix(first_col)
t_comb<-t(x)
t_comb<-as.data.frame(t_comb)
t_comb<-setDT(t_comb, keep.rownames = TRUE)[]
names(t_comb) <- as.matrix(t_comb[1,])
t_comb <- t_comb[-1,]
Functional_V1R_info<-merge(check,t_comb,by.x="Ids",by.y="Symbol")
write.table(Functional_V1R_info,"Main_pipeline/GSE756881/Functional_V1R_info.csv",row.names=FALSE,sep=",",quote = FALSE)
x<-NULL
Functional_V1R_info<-as.data.frame(Functional_V1R_info)
row.names(Functional_V1R_info)<-Functional_V1R_info$Ids
Functional_V1R_info$Ids<-NULL
Functional_V1R_info<-t(Functional_V1R_info)
Functional_V1R_info<-as.data.frame(Functional_V1R_info)

#___________________________________________________________________________________
#Plotting UMAPS for T2Rs detected.
factor0<-merge(row_names,t2r,by.x="row_names",by.y="Symbol")
factor0<-as.matrix(factor0)
a<-length(factor0)
pdf(file="Main_pipeline/GSE756881/UMAP_T2R_plots.pdf")
for (j in 1:a){
  tryCatch(
    {
      
      k<-FeaturePlot(pb, features = c(factor0[j,]),cols=my_col)
      plot(k)
      
    },
    error=function(cond) {
      message(paste("."))
      
    },
    warning=function(cond) {
      message(paste("."))
      
    },
    finally={
      
    }
  )    
}
dev.off()

check<-Idents(pb)
check<-as.data.frame(check)
#Converting row names as first column  
check<-setDT(check, keep.rownames = TRUE)[]
#Assigning column names to check
colnames(check)<- c("Ids","Cluster")
x<-merge(t2r,seurat_input_copy,by.x="Symbol",by.y="rn")
first_col<-x[,1]
first_col<-as.matrix(first_col)
t_comb<-t(x)
t_comb<-as.data.frame(t_comb)
t_comb<-setDT(t_comb, keep.rownames = TRUE)[]
names(t_comb) <- as.matrix(t_comb[1,])
t_comb <- t_comb[-1,]
Functional_T2R_info<-merge(check,t_comb,by.x="Ids",by.y="Symbol")
write.table(Functional_T2R_info,"Main_pipeline/GSE756881/Functional_T2R_info.csv",row.names=FALSE,sep=",",quote = FALSE)
x<-NULL
Functional_T2R_info<-as.data.frame(Functional_T2R_info)
row.names(Functional_T2R_info)<-Functional_T2R_info$Ids
Functional_T2R_info$Ids<-NULL
Functional_T2R_info<-t(Functional_T2R_info)
Functional_T2R_info<-as.data.frame(Functional_T2R_info)

#___________________________________________________________________________________
#Plotting UMAPS for T1Rs detected.
factor0<-merge(row_names,t1r,by.x="row_names",by.y="Symbol")
factor0<-as.matrix(factor0)
a<-length(factor0)
pdf(file="Main_pipeline/GSE756881/UMAP_T1R_plots.pdf")
for (j in 1:a){
  tryCatch(
    {
      
      k<-FeaturePlot(pb, features = c(factor0[j,]),cols=my_col)
      plot(k)
      
    },
    error=function(cond) {
      message(paste("."))
      
    },
    warning=function(cond) {
      message(paste("."))
      
    },
    finally={
      
    }
  )    
}
dev.off()

check<-Idents(pb)
check<-as.data.frame(check)
#Converting row names as first column  
check<-setDT(check, keep.rownames = TRUE)[]
#Assigning column names to check
colnames(check)<- c("Ids","Cluster")
x<-merge(t1r,seurat_input_copy,by.x="Symbol",by.y="rn")
first_col<-x[,1]
first_col<-as.matrix(first_col)
t_comb<-t(x)
t_comb<-as.data.frame(t_comb)
t_comb<-setDT(t_comb, keep.rownames = TRUE)[]
names(t_comb) <- as.matrix(t_comb[1,])
t_comb <- t_comb[-1,]
Functional_T1R_info<-merge(check,t_comb,by.x="Ids",by.y="Symbol")
write.table(Functional_T1R_info,"Main_pipeline/GSE756881/Functional_T1R_info.csv",row.names=FALSE,sep=",",quote = FALSE)
x<-NULL
Functional_T1R_info<-as.data.frame(Functional_T1R_info)
row.names(Functional_T1R_info)<-Functional_T1R_info$Ids
Functional_T1R_info$Ids<-NULL
Functional_T1R_info<-t(Functional_T1R_info)
Functional_T1R_info<-as.data.frame(Functional_T1R_info)

#___________________________________________________________________________________
#Plotting UMAPS for Pseudo ORs detected.
factor0<-merge(row_names,p_or,by.x="row_names",by.y="Symbol")
factor0<-as.matrix(factor0)
a<-length(factor0)
pdf(file="Main_pipeline/GSE756881/UMAP_Pseudo_Olfactory_plots.pdf")
for (j in 1:a){
  tryCatch(
    {
      
      k<-FeaturePlot(pb, features = c(factor0[j,]),cols=my_col)
      plot(k)
      
    },
    error=function(cond) {
      message(paste("."))
      
    },
    warning=function(cond) {
      message(paste("."))
      
    },
    finally={
      
    }
  )    
}
dev.off()

check<-Idents(pb)
check<-as.data.frame(check)
#Converting row names as first column  
check<-setDT(check, keep.rownames = TRUE)[]
#Assigning column names to check
colnames(check)<- c("Ids","Cluster")
x<-merge(p_or,seurat_input_copy,by.x="Symbol",by.y="rn")
first_col<-x[,1]
first_col<-as.matrix(first_col)
t_comb<-t(x)
t_comb<-as.data.frame(t_comb)
t_comb<-setDT(t_comb, keep.rownames = TRUE)[]
names(t_comb) <- as.matrix(t_comb[1,])
t_comb <- t_comb[-1,]
Functional_Pseudo_olfactory_info<-merge(check,t_comb,by.x="Ids",by.y="Symbol")
write.table(Functional_Pseudo_olfactory_info,"Main_pipeline/GSE756881/Functional_Pseudo_olfactory_info.csv",row.names=FALSE,sep=",",quote = FALSE)
x<-NULL
Functional_Pseudo_olfactory_info<-as.data.frame(Functional_Pseudo_olfactory_info)
row.names(Functional_Pseudo_olfactory_info)<-Functional_Pseudo_olfactory_info$Ids
Functional_Pseudo_olfactory_info$Ids<-NULL
Functional_Pseudo_olfactory_info<-t(Functional_Pseudo_olfactory_info)
Functional_Pseudo_olfactory_info<-as.data.frame(Functional_Pseudo_olfactory_info)

Combining all the data frames together, to obatin a final complete Dataframe.

final<-do.call("rbind", list(Functional_Olfactory_info,Functional_Pseudo_olfactory_info,Functional_T1R_info,Functional_T2R_info,Functional_TAAR_info,Functional_V1R_info))
remove<-list("Cluster1", "Cluster2", "Cluster3", "Cluster4", "Cluster5")
final = final[ !(rownames(final) %in% remove), ]
selected_receptors<-row.names(final)
selected_receptors<-as.data.frame(selected_receptors)
selected_receptors<-selected_receptors[-1,]
selected_receptors<-as.data.frame(selected_receptors)
final<-t(final)
final<-as.data.frame(final)
final<-setDT(final, keep.rownames = "Cell_Ids")[]
write.table(final,file="Main_pipeline/GSE756881/Final_Receptor_file.csv",sep=",",row.name=FALSE,col.names = TRUE,quote=FALSE)

Another task is to classify the cell as Negative or Positive based on the expression of ORs.

selected_rep_zfpkm_values<-merge(selected_receptors,active_zfpkm_genes,by.x="selected_receptors",by.y="Gene_names")
row.names(selected_rep_zfpkm_values)<-selected_rep_zfpkm_values$selected_receptors
selected_rep_zfpkm_values$selected_receptors<-NULL
selected_rep_zfpkm_values<-t(selected_rep_zfpkm_values)
selected_rep_zfpkm_values<-as.data.frame(selected_rep_zfpkm_values)
selected_rep_zfpkm_values$count_value<-apply(selected_rep_zfpkm_values,1,function(x) sum(x>=-3))
selected_rep_zfpkm_values$Cell_status<-ifelse(selected_rep_zfpkm_values$count_value>0,"Positive","Negative")
selected_rep_zfpkm_values<-setDT(selected_rep_zfpkm_values, keep.rownames = "Cell_Ids")[]
write.table(selected_rep_zfpkm_values,file="Main_pipeline/GSE756881/Cell_staus_zfpkm.csv",sep=",",row.names=FALSE,col.names = TRUE,quote=FALSE)

Gene Set Variation Analysis

input<-read.csv("Main_pipeline/GSE756881/copy_of_seu_input.csv")
mer<-merge(selected_receptors,input,by.x="selected_receptors",by.y="rn")
row.names(mer) <- mer$selected_receptors
mer[1] <- NULL
row.names(input) <- input$rn
input[1] <- NULL
input<-as.matrix(input)

#Input GMT file
genelist_mic <- getGmt("GMT_file/GMT_file_for_GSVA.gmt")
gbm_es <- gsva(input, genelist_mic)
gbm_es<-as.data.frame(gbm_es)
df=data.frame(matrix(ncol=4,nrow=0))
colnames(df)<-c("Genename","Signature","Pvalue","Corelation")
for (i in 1:nrow(mer)){
  p<-as.matrix(mer[i,])
  for (j in 1:nrow(gbm_es)){
    q<-as.matrix(gbm_es[j,])
    result<-cor.test(p,q)
    com<-c(row.names(p),row.names(q),result[["p.value"]],result[["estimate"]][["cor"]])
    df[nrow(df)+1,]=com
  }
}
df<-filter(df, Pvalue <= 0.01)
write.table(df,file="Main_pipeline/GSE756881/GSVA_signatures.csv", sep=",", row.names = FALSE, col.names = TRUE, quote = FALSE)
df
gbm_es_2<-gbm_es
gbm_es_2<-setDT(gbm_es_2,keep.rownames = "Signature")[]
write.table(gbm_es_2,file="Main_pipeline/GSE756881/GSVA_scores.csv",sep=",",row.names=FALSE,col.names=TRUE,quote=FALSE)

The GSVA results will be similar to the table shown below

Gene Name Signature P-Value Correlation
OR6C75 Differentiation 0.0001542391263 0.21
OR8H1 Angiogenesis 0.0003532790554 0.20
OR8H1 Invasion 0.0001711976409 -0.21
OR7G2 DNA_repair 0.0001577398186 -0.21
OR1A1 Inflammation 0.009691620452 -0.33

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Packages

No packages published