# Integrative Analysis. Robust Rank Aggregation

Integrative Analysis aims at combining heterogeneous data at different omic levels. 

The integration is performed using Robust Rank Aggregation (RRA) method (Kolde R et al., 2012). It detects genes that are ranked consistently better than expected under null hypothesis of uncorrelated inputs and assigns a significance score for each gene.

For each item, the algorithm looks at how the item is positioned in the ranked lists and compares this to the baseline case where all the preference lists are randomly shuffled. As a result, it assigns a P-value for all items, showing how much better it is positioned in the ranked lists than expected by chance. This P-value is used both for re-ranking the items and deciding their significance.

Since the number of informative ranks is not known, it defines the final score for the rank vector r as the minimum of P-values and order all rank vectors according to their ρ scores. 

In [1]:
library(RobustRankAggreg)

### 1) Have a look at input datasets

We are combining here the results from the meta analysis of GWES Microarray and the results from the GWAS analysis. Note we just have one GWAS data analysed so there is no meta GWAS results. 

Ensure you have common gene symbols in the datasets to integrate.

In [2]:
metaGWES_blood=read.table("/mnt/data/MetaAnalysis/output/L3-GWES-STAGE_I-BLOOD.MetaDE.Unstratified.tsv")
head(metaGWES_blood,n=3)

Unnamed: 0_level_0,rank,logFC,Var,Qpvalue,Pvalue
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>
CETN2,1,-0.1754719,0.01072363,3.147592e-09,0
DNAJC8,1,-0.1696543,0.01519292,7.199249e-11,0
PSMC2,1,-0.2393246,0.01700587,5.570692e-05,0


In [3]:
metaGWES_hp=read.table("/mnt/data/MetaAnalysis/output/L3-GWES-STAGE_I-HP.MetaDE.Unstratified.csv", header=TRUE, row.names=1, sep=",")
head(metaGWES_hp,n=3)

Unnamed: 0_level_0,rank,logFC..case.vs.ctl.,Var,Qpvalue,REM.Pvalue,REM.FDR,Fisher.Pvalue,Fisher.FDR,n.estimators
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>
ZHX1,1,0.4302563,0.006218298,0.8119241,4.863931e-08,0.0007996383,1.613147e-06,0.02557691,2
OPA1,2,-0.48587,0.008116874,0.4078185,6.931377e-08,0.0007996383,2.78025e-06,0.02557691,2
INPP5F,3,-0.3624026,0.103217752,4.308188e-06,0.2593144,0.677636035,4.242689e-06,0.02602041,2


In [4]:
metaGWES_cortex=read.table("/mnt/data/MetaAnalysis/output/L3-GWES-STAGE_I_II-CORTEX.MetaDE.Unstratified.csv", header=TRUE, row.names=1, sep=",")
head(metaGWES_cortex,n=3)

Unnamed: 0_level_0,rank,logFC..case.vs.ctl.,Var,Qpvalue,REM.Pvalue,REM.FDR,Fisher.Pvalue,Fisher.FDR,n.estimators,rankE2
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<int>
ADCYAP1,1,-0.2551334,0.007929618,1.545044e-17,0.004168657,0.0582137874,0,0,11,
AGK,1,-0.1599785,0.001359238,0.000712472,1.429754e-05,0.0009526985,0,0,11,
AP1S1,1,-0.1268419,0.003961244,4.30372e-14,0.04386973,0.235289415,0,0,11,


In [5]:
GWAS_statistics=read.table("/mnt/data/MetaAnalysis/output/GWAS_summary_statistics_ranked_Rojas_et_al.csv", header=TRUE, sep=",")
head(GWAS_statistics,n=3)

Unnamed: 0_level_0,GWAS_rank,Gene,Chr,Start.BP,End.BP,NSNPS,NPARAM,Z.score,P,Gene.name,eQTL_map,CI_map,Pos_map
Unnamed: 0_level_1,<int>,<chr>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<chr>,<int>,<int>,<int>
1,1,ENSG00000130202,19,45349432,45392485,237,48,25.211,1.52e-140,PVRL2,1,1,1
2,2,ENSG00000130204,19,45393826,45406946,64,19,23.607,1.65e-123,TOMM40,1,1,1
3,3,ENSG00000130203,19,45409011,45412650,8,5,17.315,1.83e-67,APOE,1,1,1


### 2) RRA method

Ensure input list are ordered by p value ascending. 

In [6]:
# create a list object with the ordered genes from each dataset to integrate
genelist <- list(as.character(rownames(metaGWES_blood), rownames(metaGWES_hp), rownames(metaGWES_cortex)),as.character(GWAS_statistics$Gene.name))

In [7]:
?aggregateRanks

0,1
aggregateRanks {RobustRankAggreg},R Documentation

0,1
glist,"list of element vectors, the order of the vectors is used as the ranking."
rmat,the rankings in matrix format. The glist is by default converted to this format.
N,"the number of ranked elements, important when using only top-k ranks, by default it is calculated as the number of unique elements in the input."
method,"rank aggregation method, by defaylt 'RRA', other options are 'min', 'geom.mean', 'mean', 'median' and 'stuart'"
full,"indicates if the full rankings are given, used if the the sets of ranked elements do not match perfectly"
exact,"indicator showing if exact p-value will be calculated based on rho score (Default: if number of lists smaller than 10, exact is used)"
topCutoff,a vector of cutoff values used to limit the number of elements in the input lists elements do not match perfectly


In [9]:
# call aggregateRanks method from RobustRankAggreg library
agglist<-aggregateRanks(rmat=rankMatrix(genelist,full = TRUE),method = "RRA", exact=TRUE) 
dim(agglist)
agglist

Unnamed: 0_level_0,Name,Score
Unnamed: 0_level_1,<chr>,<dbl>
CETN2,CETN2,8.192692e-05
DNAJC8,DNAJC8,1.638538e-04
PSMC2,PSMC2,2.457808e-04
ARPC3,ARPC3,3.277077e-04
SNRPG,SNRPG,4.096346e-04
GTF2H5,GTF2H5,4.915615e-04
SRP14,SRP14,5.734884e-04
MRPL22,MRPL22,6.554154e-04
CETN3,CETN3,7.373423e-04
NDUFS4,NDUFS4,8.192692e-04


In [10]:
agglist$adjP.Val=p.adjust(agglist$Score, method = "bonferroni")
head(agglist)

Unnamed: 0_level_0,Name,Score,adjP.Val
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>
CETN2,CETN2,8.192692e-05,1
DNAJC8,DNAJC8,0.0001638538,1
PSMC2,PSMC2,0.0002457808,1
ARPC3,ARPC3,0.0003277077,1
SNRPG,SNRPG,0.0004096346,1
GTF2H5,GTF2H5,0.0004915615,1


In [11]:
agglist$adjP.Val2=agglist$Score*2
agglist$adjP.Val2[agglist$adjP.Val2>1] <- 1
head(agglist)

Unnamed: 0_level_0,Name,Score,adjP.Val,adjP.Val2
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>
CETN2,CETN2,8.192692e-05,1,0.0001638538
DNAJC8,DNAJC8,0.0001638538,1,0.0003277077
PSMC2,PSMC2,0.0002457808,1,0.0004915615
ARPC3,ARPC3,0.0003277077,1,0.0006554154
SNRPG,SNRPG,0.0004096346,1,0.0008192692
GTF2H5,GTF2H5,0.0004915615,1,0.0009831231


In [12]:
# rank the final list using the Rank library from the R Basic package 
rank<-rank(agglist$Score,na.last = "keep", ties.method = "min")
ranked<-cbind(rank,agglist)
head(ranked)

Unnamed: 0_level_0,rank,Name,Score,adjP.Val,adjP.Val2
Unnamed: 0_level_1,<int>,<chr>,<dbl>,<dbl>,<dbl>
CETN2,1,CETN2,8.192692e-05,1,0.0001638538
DNAJC8,2,DNAJC8,0.0001638538,1,0.0003277077
PSMC2,3,PSMC2,0.0002457808,1,0.0004915615
ARPC3,4,ARPC3,0.0003277077,1,0.0006554154
SNRPG,5,SNRPG,0.0004096346,1,0.0008192692
GTF2H5,6,GTF2H5,0.0004915615,1,0.0009831231


In [None]:
# If you have rank to NA, you can set those to last non NA rank +1
# get the last non NA index
NonNAindex <- which(!is.na(ranked$rank))
lastNonNA <- max(NonNAindex)
lastNonNA
# change all NA index to last non NA +1
ranked$rank[is.na(ranked$rank)]<-lastNonNA+1

In [15]:
dir.create("/mnt/data/IntegrativeAnalysis/output", recursive = TRUE)

In [14]:
write.table(ranked,"/mnt/data/IntegrativeAnalysis/output/RRA_paper_result")