# Figure 4: 
# Correlation test of mitonuclear DNA discordance and adjusted gene expression (for each gene and tissue)

### Load R packages/dependencies.

In [1]:
# Load R libraries.
library(tidyr)
library(dplyr)
library(patchwork)
library(ggplot2)
library(reshape2)
library(ggpubr)
library(rstatix)
#library(gginnards)

"package 'dplyr' was built under R version 4.0.5"

Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


"package 'ggplot2' was built under R version 4.0.5"
"package 'reshape2' was built under R version 4.0.5"

Attaching package: 'reshape2'


The following object is masked from 'package:tidyr':

    smiths


"package 'ggpubr' was built under R version 4.0.5"

Attaching package: 'rstatix'


The following object is masked from 'package:stats':

    filter




In [9]:
# List of genes and tissues.
list_tissues = c('Muscle - Skeletal','Esophagus - Muscularis','Artery - Tibial','Nerve - Tibial','Whole Blood','Heart - Left Ventricle','Heart - Atrial Appendage')

# Exclude ND5 and ND6 from list.
list_mtdna = c('ATP6','ATP8','CO1','CO2','CO3','CYB','ND1','ND2','ND3','ND4','ND4L')

# Figure 4

## Import the adjusted gene expression.
- Adjusted gene expression is the residuals of a multiple linear regression model per gene and tissue ( `gene expression in TPM ~ Age + Ischemic_minutes` )

In [None]:
df_adjTPM = read.table("residuals_adjTPM.txt", header=TRUE, sep="\t")

## Correlation test of mitonuclear DNA discordance and  mtDNA gene expression

In [18]:
## Calculate Spearman's for a single gene in a single tissue.
correlation_oneside = function(df,tissue,gene,col_expr){
    # Filter data for a single tissue and single gene.
    data = df %>% 
        select(short_ID,Tissue,Gene,all_of(col_expr),mitonucl_discord) %>% 
        filter(Tissue==tissue) %>% 
        filter(Gene==gene) %>% 
        unique()
    
    # Correlation between MND and expression.
    x = data$mitonucl_discord
    y = data[[col_expr]]

    # Spearman's rank correlation test (one-sided).
    # Alternative: "greater" corresponds to positive association, "less" to negative association.
    result = cor.test(x,y, method="spearman", alternative="less", exact = FALSE )
    #names(result)
    #result$p.value
    result
}

## Get table of results of the 11 genes (exclude ND5 and ND6) for one tissue at a time.
table_cor_genes = function(df,tissue,col_expr){
    ##list_mtdna = c('ATP6','ATP8','CO1','CO2','CO3','CYB','ND1','ND2','ND3','ND4','ND4L')
    genes = as.data.frame(list_mtdna)
    #print(empty)
    rho = c()
    p_value = c()
    for (gene in list_mtdna) {
        # Run Spearman's for each gene in this tissue.
        result = correlation_oneside(df,tissue,gene,col_expr)
        #print(result$p.value)
        rho = append(rho, result$estimate)
        p_value = append(p_value, result$p.value)
    }
    bonf_pvalue = p_value*length(list_mtdna)
    # If p-value is >1 after Bonferroni correction, chnage down to 1.
    bonf_pvalue = as.data.frame(bonf_pvalue)
    bonf_pvalue$bonf_pvalue[bonf_pvalue$bonf_pvalue >= 1] <- 1
    cbind(genes,rho,p_value,bonf_pvalue) %>% mutate(Tissue=tissue) %>% return()
}

## Create table of all genes for all tissues.
table_cor_tissues = function(df,col_expr){
    # See more rows.
    options(repr.matrix.max.rows=100, repr.matrix.max.cols=20)

    list_tissue = c('Muscle - Skeletal','Esophagus - Muscularis','Artery - Tibial','Nerve - Tibial','Whole Blood','Heart - Left Ventricle','Heart - Atrial Appendage')
    out = data.frame(Date=as.Date(character()), File=character(), User=character(), stringsAsFactors=FALSE)
    for (tissue in list_tissue){
        # Get Spearman's for genes in all tissues.
        out = rbind(out,table_cor_genes(df,tissue,col_expr))
    }
out
}


# See more rows.
options(repr.matrix.max.rows=100, repr.matrix.max.cols=20)

#names(correlation_oneside(df_adjTPM,'Muscle - Skeletal','ND1',"Residuals_AgeIsch"))
#correlation_oneside(df_adjTPM,'Muscle - Skeletal','ND1',"Residuals_AgeIsch")$estimate
#correlation_oneside(df_adjTPM,'Muscle - Skeletal','ND1',"Residuals_AgeIsch")#$p.value

#table_cor_genes(df_adjTPM,'Muscle - Skeletal',"Residuals_AgeIsch")

table_cor_tissues(df_adjTPM, "Residuals_AgeIsch")

list_mtdna,rho,p_value,bonf_pvalue,Tissue
<chr>,<dbl>,<dbl>,<dbl>,<chr>
ATP6,-0.167774699,6.830782e-06,7.51386e-05,Muscle - Skeletal
ATP8,-0.048499551,0.1058199,1.0,Muscle - Skeletal
CO1,-0.105205418,0.003309492,0.03640441,Muscle - Skeletal
CO2,-0.080370457,0.01913205,0.2104526,Muscle - Skeletal
CO3,-0.06158494,0.05629643,0.6192608,Muscle - Skeletal
CYB,-0.105017712,0.003358507,0.03694358,Muscle - Skeletal
ND1,-0.115152488,0.001470603,0.01617663,Muscle - Skeletal
ND2,-0.10095192,0.00459281,0.05052091,Muscle - Skeletal
ND3,-0.161099021,1.49759e-05,0.0001647349,Muscle - Skeletal
ND4,-0.047140534,0.1123683,1.0,Muscle - Skeletal
