## Replace gene symbols with NCBI entrez IDs all over the data frame

#### Load the data

In [11]:
df=read.csv("gs.txt", sep="\t", header = F, stringsAsFactors = F, strip.white = T)
df

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12
ACEVEDO_LIVER_CANCER_WITH_H3K9ME3_UP,ABCA13,ABCB5,AK5,AMOTL1,ANGPTL5,ANK2,ARHGAP18,ARMC3,B3GNT2,BMPER,C1orf88
ACEVEDO_METHYLATED_IN_LIVER_CANCER_DN,A2ML1,ABCA11P,ABCC8,ABO,ACCN2,ACSL1,ACSS3,ADAD1,ADAM11,ADAM2,ADAM5P
ACEVEDO_NORMAL_TISSUE_ADJACENT_TO_LIVER_TUMOR_DN,AARS,ABCC2,ABHD10,ABHD14B,ABHD6,ACADSB,ACAT2,ACTG1,ADH4,ADH6,ADI1
ACTGCAG_MIR173P,ACACA,ACTR1A,ANKRD50,ARID2,ARMC8,BCORL1,BRWD3,BTF3,C20orf20,CACNA2D4,CAMK2D
ACTGCCT_MIR34B,ABCC1,ACACA,ACSL1,ACTL6A,ACTR1A,ADCY2,ADORA2A,AHCYL2,AKAP1,ALCAM,ANGPTL7
ACTGTAG_MIR139,AEBP2,AKIRIN2,ANK2,AP1S2,AP3M1,APLP2,ARRDC3,ATP2B2,ATRX,ATXN1,AUTS2
ACTGTGA_MIR27A_MIR27B,ABCA1,ABCB9,ACOT11,ACVR1,ACVR2A,ADAM19,ADAMTS10,ADCY3,ADCY6,ADORA2B,AFAP1
AFP1_Q6,AARS2,ABLIM1,ACVR1B,ADAM11,AFF3,AGL,ANKRD28,ANKRD39,ANKS1B,ARHGDIB,ARL3
AGGCACT_MIR5153P,ARHGEF12,ARIH1,ARL4C,BAZ1B,BAZ2A,BCOR,BICD2,BTBD3,C10orf140,C11orf87,C14orf45
AGGTGCA_MIR500,ABCC4,ADAMTSL3,ANKRD13A,ATL1,B4GALNT3,BTBD11,C17orf74,C5orf30,CA10,CACNB1,CAMK4


#### Extract the unique gene list and store it

In [12]:
library(tidyr)
gene_list=unique(sort(gather(df[-1],"","genes")$genes))

#### Get the values using biomart service

In [13]:
library(biomaRt)
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
test=getBM(attributes = c("hgnc_symbol", "entrezgene"), filters = "hgnc_symbol", values = gene_list, bmHeader = T, mart = mart)

#### Change the column names and change the type of the object from factor to character

In [14]:
colnames(test)=c("hgnc_symbol","ncbi_gene_id")
test$ncbi_gene_id=as.character(test$ncbi_gene_id)

#### Now let us replace the values using forcats library. Before that let us create a new dataframe from old data frame so that old data is not over written.

In [15]:
library(forcats)
new_df=df
new_df[-1] <- lapply(new_df[-1], function(x) lvls_revalue(factor(x, levels = test$hgnc_symbol),
                                                           test$ncbi_gene_id))
new_df

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12
ACEVEDO_LIVER_CANCER_WITH_H3K9ME3_UP,154664,340273.0,26289,154810,253935.0,287,93663.0,219681,10678.0,168667,
ACEVEDO_METHYLATED_IN_LIVER_CANCER_DN,144568,,6833,28,,2180,79611.0,132612,4185.0,2515,
ACEVEDO_NORMAL_TISSUE_ADJACENT_TO_LIVER_TUMOR_DN,16,1244.0,55347,84836,57406.0,36,39.0,71,127.0,130,55256.0
ACTGCAG_MIR173P,31,10121.0,57182,196528,25852.0,63035,254065.0,689,,93589,817.0
ACTGCCT_MIR34B,4363,31.0,2180,86,10121.0,108,135.0,23382,8165.0,214,10218.0
ACTGTAG_MIR139,121536,55122.0,287,8905,26985.0,334,57561.0,491,546.0,6310,26053.0
ACTGTGA_MIR27A_MIR27B,19,23457.0,26027,90,92.0,8728,81794.0,109,112.0,136,60312.0
AFP1_Q6,57505,3983.0,91,4185,3899.0,178,23243.0,51239,56899.0,397,403.0
AGGCACT_MIR5153P,23365,25820.0,10123,9031,11176.0,54880,23299.0,22903,,399947,
AGGTGCA_MIR500,10257,57188.0,88455,51062,283358.0,121551,,90355,56934.0,782,814.0
