---
# Evaluate data integration using LISI
*L.Richards*  
*2021-06-14*  
*/cluster/projects/pughlab/projects/cancer_scrna_integration/evalutation/lisi*

---

https://github.com/immunogenomics/LISI

In [None]:
# install.packages("devtools")
# devtools::install_github("immunogenomics/lisi")
library(lisi) # v1.0
library(Seurat)
library(rlist)

In [None]:
# list metadata files with embeddings (used for Fig 1 plotting)
embeddings.path <- "/cluster/projects/pughlab/projects/cancer_scrna_integration/figures"
embeddings <- list.files(embeddings.path, pattern = ".csv")
results <- list()

for (i in 1:length(embeddings)){

    # load data
    print(embeddings[i])
    dat <- read.csv(paste0(embeddings.path, "/", embeddings[i]))
    rownames(dat) <- dat$X
    
    #####################
    # calculate lisi for each method
    methods <- unique(dat$Method)

    for (j in 1:length(methods)){
    
        print(methods[j])
        # set up lisi input files
        sub <- dat[dat$Method == methods[j], ] # subset methods      
        X <- sub[ ,c("Coords_1", "Coords_2")] # subset out embeddings
        
        # calcualte and normalize lisi for samples and patients
        lisi <- compute_lisi(X, sub, c("SampleID", "PatientID"))
        lisi$SampleID_Norm <- lisi$SampleID / length(unique(sub$SampleID))
        lisi$PatientID_Norm <- lisi$SampleID / length(unique(sub$PatientID))
        
        #####################
        # calcualte lisi for each cell type
        # have to subset the dataframe by each cell type and calc
        celltypes <- unique(sub$CellType)
        lisi.celltypes <- list()
        
        for (k in 1:length(celltypes)){
            
            print(celltypes[k])
            # subset embeddings by cell type
            sub.cell <- sub[sub$CellType == celltypes[k], ]
            X.cell <- X[rownames(sub.cell), ] 
            
            # calculate and normalize lisi within cell type
            if(nrow(X.cell) < 40){
                per <- 10
            } else { per <- 30 }
            print(per)
            cell.lisi <- compute_lisi(X.cell, sub.cell, perplexity = per, c("SampleID", "PatientID"))
            cell.lisi$SampleID_Norm <- cell.lisi$SampleID / length(unique(sub.cell$SampleID))
            cell.lisi$PatientID_Norm <- cell.lisi$SampleID / length(unique(sub.cell$PatientID))
            colnames(cell.lisi) <- paste0("CellType_", colnames(cell.lisi))
            cell.lisi$CellType <- celltypes[k]
            lisi.celltypes[[k]] <- cell.lisi
        
        }

        lisi.celltypes <- do.call(rbind, lisi.celltypes) # combine
        lisi.celltypes <- lisi.celltypes[rownames(lisi), ] # reorder
        identical(rownames(lisi.celltypes), rownames(lisi)) # sanity check
        lisi <- cbind(lisi, lisi.celltypes) # combine cell and batch lisi
        lisi$Method <- methods[j]
        lisi$Study <- gsub("_MergedMeta.csv", "", embeddings[i])
        colnames(lisi)[grep("_", colnames(lisi))] <- paste0("LISI_", colnames(lisi)[grep("_", colnames(lisi))]) 
        results <- list.append(results, lisi) 
    
    }
            
}

results <- do.call(rbind, results) # combine across studies
dim(results) # 974206 rows
colnames(results)[1:2] <- paste0("LISI_", colnames(results)[1:2])
results$CellBarcode <- rownames(results)

In [None]:
# save results 
write.csv(results, file = "LISI_calculations.csv")