# Combine Tables

In [1]:
setwd("/home/jp/ICP_Responders/FinalTables")

In [2]:
library(plyr)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Fetching Data

In [9]:
prat <- read.csv("Prat.csv")
prat <- prat[prat$Type %in% c("MELANOMA"), ]  # The database from Prat et al., Cancer Res 2017 has data for multiple cancer types,
                                              #picking only the melanoma samples
gide_pn <- read.csv("Gide_pn.csv")
# gide_pni <- read.csv("Gide_pni.csv")
hugo <- read.csv("Hugo.csv")
lauss <- read.csv("Lauss.csv")
nathanson <- read.csv("Nathanson_pre.csv")
riaz_naive <- read.csv("Riaz_naive.csv")
riaz_prog <- read.csv("Riaz_prog.csv")

In [10]:
# Removing unnecessary X  from entrez ID as R adds it as a suffix to colnames with numeric characters
names(prat) <- sub("^X", "", names(prat))
# names(gide_pni) <- sub("^X", "", names(gide_pni))
names(gide_pn) <- sub("^X", "", names(gide_pn))
names(hugo) <- sub("^X", "", names(hugo))
names(lauss) <- sub("^X", "", names(lauss))
names(nathanson) <- sub("^X", "", names(nathanson))
names(riaz_naive) <- sub("^X", "", names(riaz_naive))
names(riaz_prog) <- sub("^X", "", names(riaz_prog))

In [33]:
nrow(prat)
nrow(gide_pn)
nrow(hugo)
nrow(lauss)
nrow(nathanson)
nrow(riaz_naive)
nrow(riaz_prog)

In [28]:
grep("\\bPFS\\b",names(prat), value=T)

## Finding common column names


In [11]:
common_col_names <- Reduce(intersect, list(names(prat),names(gide_pn),names(hugo),names(lauss),
                                          names(nathanson),names(riaz_naive),names(riaz_prog)))
#TODO: Fix to include PFS, OS, PFS Event, etc. Names don't match exactly
head(common_col_names)

In [12]:
prat_sub <- prat[ , common_col_names]   
# gide_pni_sub <- gide_pni[ , common_col_names]   
gide_pn_sub <- gide_pn[ , common_col_names]  
hugo_sub <- hugo[ , common_col_names]  
lauss_sub <- lauss[ , common_col_names]
nathanson_sub <- nathanson[ , common_col_names]
riaz_naive_sub <- riaz_naive[ , common_col_names]
riaz_prog_sub <- riaz_prog[ , common_col_names]

In [13]:
final_table_response_and_expression <- rbind.fill(prat_sub,gide_pn_sub,hugo_sub,lauss_sub,
                                                  nathanson_sub,riaz_naive_sub,riaz_prog_sub)
head(final_table_response_and_expression)

Unnamed: 0_level_0,Patient,Response,3920,345611,3929,54210,3716,10454,3557,3556,⋯,80380,80381,2002,353514,4940,84166,6236,80153,4851,4582
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,A41,0,,,,,,,,,⋯,,,,,,,,,,
2,A42,0,,,,,,,,,⋯,,,,,,,,,,
3,A43,0,,,,,,,,,⋯,,,,,,,,,,
4,A44,1,,,,,,,,,⋯,,,,,,,,,,
5,A45,0,,,,,,,,,⋯,,,,,,,,,,
6,A46,0,,,,,,,,,⋯,,,,,,,,,,


# Writing final csv

In [14]:
write.csv(final_table_response_and_expression, "Final_table_response_and_expression.csv", row.names = FALSE)

In [16]:
df <- read.csv("Final_table_response_and_expression.csv")
names(df) <- sub("^X", "", names(df))
head(names(df))