STAMPScreen Network Scoring

Files needed prior to running:

* Matrix of STRING gene interactions using Ensembl IDs, downloaded from STRING - STRINGedgematrix_all_ensembl.csv

* Dataframe of DEGscores calculated from DGEA output according to STAMPScreen methods, where there is 1 row of DEGscores and columns are genes (column names are Ensembl IDs) - DEGscores.csv

In [1]:
### Filter STRING matrix to only DEGs for scoring only on DEG networks and normalize interaction scores

# read in STRING matrix with ensembl IDs
all_mat <- read.csv('STRINGedgematrix_all_ensembl.csv')
rownames(all_mat) <- all_mat$X
all_mat <- all_mat[2:ncol(all_mat)]

# read in DEGscores calculated from DGEA output
Gds <- read.csv('DEGscores_demo.csv')
rownames(Gds) <- Gds$X
Gds <- Gds[2:ncol(Gds)]
degs <- colnames(Gds)

# filter STRING matrix to DEGs
idx <- which(colnames(all_mat) %in% degs)
mat <- all_mat[idx, idx]
print(paste(length(idx), ' remaining STRING DEGs in matrix.', sep=''))

# match order of matrix IDs to order of DEGs
mat <- mat[order(rownames(mat)), order(colnames(mat))]

# check that all genes are in matrix
print('All DEGs included in matrix?')
print(length(degs) == ncol(mat))

# normalize weights
x <- c()
for (col in 1:ncol(mat)){ # get all nonzero scores to norm by max
    x <- c(x, mat[, col][which(mat[, col] > 0)])
}

# calculate normalized scores
mat[mat > 0] <- log10(mat[mat > 0])/max(log10(x))
write.csv(mat, 'STRINGedgematrix_ensembl.csv')

[1] "20 remaining STRING DEGs in matrix."
[1] "All DEGs included in matrix?"
[1] TRUE


In [7]:
### Run network scoring algorithm and save results

# set up results (network scores) dataframe
scores <- data.frame(matrix(0, nrow=ncol(Gds), ncol=1), row.names=colnames(Gds))
colnames(scores) <- "score"
scores$gene <- colnames(Gds)

# run network scoring
for (gene in rownames(scores)){
    print(gene)
    L <- 0
    O <- 0
    score <- 0
    # using >0 as a binary condition
    firstedge <- which((rownames(mat) %in% colnames(all_mat)[which(all_mat[gene, ] > 0)]) & (colnames(mat) != gene))
    # gene has no interactions with other DEGs
    if (length(firstedge) == 0){ 
        print('not in STRING') 
    }
    for (node1 in firstedge){
        L <- 1
        O <- length(firstedge)
        gene2 <- colnames(mat)[node1]
        score <- score + Gds[1, gene2] * 1/L * 1/O
        # using >0 as a binary condition, exclude edges for step direction
        secondedge <- which((mat[gene2,] > 0) & !(colnames(mat) %in% colnames(mat)[firstedge]) & (colnames(mat) != gene))
        for (node2 in secondedge){  
            L <- 2
            O <- length(secondedge)
            gene3 <- colnames(mat)[node2]
            score <- score + Gds[1, gene3] * 1/L * 1/O
            # using >0 as a binary condition, exclude edges for step direction
            thirdedge <- which((mat[gene3,] > 0) & !(colnames(mat) %in% colnames(mat)[firstedge]) & !(colnames(mat) %in% colnames(mat)[secondedge]) & (colnames(mat) != gene))
            for(node3 in thirdedge){
                L <- 3
                O <- length(thirdedge)
                gene4 <- colnames(mat)[node3]
                score <- score + Gds[1, gene4] * 1/L * 1/O
            }
        }
    }
    print(score)
    flush.console()
    scores[gene,1] <- score
}

# save ranked results
scores <- scores[order(-scores$score), ]
write.csv(scores, file="STRING_NS_results.csv", row.names=TRUE)

[1] "ENSG00000164284"
[1] "not in STRING"
[1] 0
[1] "ENSG00000113356"
[1] 313.4389
[1] "ENSG00000155858"
[1] 11.79969
[1] "ENSG00000158560"
[1] 71.32101
[1] "ENSG00000073670"
[1] 110.1186
[1] "ENSG00000233041"
[1] "not in STRING"
[1] 0
[1] "ENSG00000132563"
[1] 103.7282
[1] "ENSG00000103222"
[1] 357.347
[1] "ENSG00000138031"
[1] 410.8016
[1] "ENSG00000163472"
[1] 262.6748
[1] "ENSG00000206503"
[1] 95.33725
[1] "ENSG00000175662"
[1] "not in STRING"
[1] 0
[1] "ENSG00000130038"
[1] 40.11647
[1] "ENSG00000164690"
[1] 525.7541
[1] "ENSG00000137571"
[1] "not in STRING"
[1] 0
[1] "ENSG00000123560"
[1] 391.6376
[1] "ENSG00000133710"
[1] 9.831671
[1] "ENSG00000119787"
[1] 42.15209
[1] "ENSG00000166436"
[1] "not in STRING"
[1] 0
[1] "ENSG00000162231"
[1] 12.50082
