analysis/wolfrum_integration.Rmd

---
title: "R Notebook"
output: html_notebook
---

```{r}
library(Seurat)
library(SeuratWrappers)
library(monocle)
library(cowplot)
library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
library(DT)
library(harmony)
```

```{r}
data_180831 <- readRDS('output/seurat_objects/180831/10x-180831-S3')
```


# Seurat integration

## Integrating Wolfrum with 10x-180831 data

```{r, fig.height = 8, fig.width = 12, fig.align = "center"}
#anchors <- FindIntegrationAnchors(object.list = list(wolfrum, data_180831), dims = 1:20)
#integrated <- IntegrateData(anchorset = anchors, dims = 1:20)
#saveRDS(integrated, '/projects/pytrik/sc_adipose/analyze_10x_fluidigm/10x-adipocyte-analysis/output/seurat_objects/wolfrum/wolfrum.180831.integrated.rds')
integrated <- readRDS('output/seurat_objects/wolfrum/wolfrum.180831.integrated.rds')
integrated@meta.data['dataset'] <- '10x-180831'
integrated@meta.data[which(is.na(integrated@meta.data$branch)), 'dataset'] <- 'Wolfrum'

plot_grid(
  UMAPPlot(integrated, group.by='dataset'),
  UMAPPlot(integrated, group.by='seurat_clusters', label=T),
  UMAPPlot(integrated, group.by='branch'), ncol=2
)
```

These results also confirm that the L branch is closest to cluster 11 and U is closest to the U branch.

## Predict cell types with Seurat's TransferData

```{r}
wolfrum.predicted_labels <- readRDS('output/seurat_objects/wolfrum/wolfrum.predicted_labels_180831.rds')
```

Used pca as dimred for FindTransferAnchors and IntegrateData. 

```{r, fig.height = 8, fig.width = 12, fig.align = "center"}
plot_grid(
  FeaturePlot(wolfrum.predicted_labels, features='predictions_pca.prediction.score.Progenitor'),
  FeaturePlot(wolfrum.predicted_labels, features='predictions_pca.prediction.score.Metabolic'),
  FeaturePlot(wolfrum.predicted_labels, features='predictions_pca.prediction.score.ECM'), ncol=2
)
```

For all predictions, change predicted id to NA if max score is below a certain threshold. 

```{r}
assign_labels <- function(colname, threshold=0.5){
  pred_ids <- unlist(as.vector(apply(wolfrum.predicted_labels@meta.data[,c(paste(colname,'.prediction.score.max', sep=''), paste(colname, '.predicted.id', sep=''))], 1, function(x){
    if (x[[1]] < threshold){
      return(NA)
    } else{
      return(x[[2]])
    }
  })))
  return(pred_ids)
}

for (col in c('predictions_pca')){
  for (t in c(0.5, 0.7, 0.9, 0.95, 0.99)){
    preds <- assign_labels(col, t)
    wolfrum.predicted_labels <- AddMetaData(wolfrum.predicted_labels, preds, col.name=paste(col, 'predicted_label', t, sep='.'))
  }
}
```


```{r, fig.height = 10, fig.width = 12, fig.align = "center"}
plot_grid(
  UMAPPlot(wolfrum.predicted_labels, group.by='predictions_pca.predicted_label.0.7') + ggtitle('threshold=0.7'),
  UMAPPlot(wolfrum.predicted_labels, group.by='predictions_pca.predicted_label.0.9') + ggtitle('threshold=0.9'),
  UMAPPlot(wolfrum.predicted_labels, group.by='predictions_pca.predicted_label.0.95') + ggtitle('threshold=0.95'),
  UMAPPlot(wolfrum.predicted_labels, group.by='predictions_pca.predicted_label.0.99') + ggtitle('threshold=0.99'),
  ncol=2
)
```

Color predictions in integrated dataset

```{r}
integrated@meta.data['branch_labels_integrated'] <- wolfrum.predicted_labels@meta.data$predictions_pca.predicted_label.0.9[match(rownames(integrated@meta.data), rownames(wolfrum.predicted_labels@meta.data))]
integrated@meta.data['branch_labels_integrated'] <- unlist(lapply(integrated@meta.data$branch_labels_integrated, function(x){
  if (is.na(x)){
    return('Non-matching cells')
  } else {
      if (x == 'Progenitor'){
        return('Seurat predicted match to P')
      } else if (x == 'Metabolic') {
        return('Seurat predicted match to U')
      } else if (x == 'ECM') {
        return('Seurat predicted match to L')
      } else {
        return('Non-matching cells')
      }
  }
}))

integrated@meta.data$branch_labels_integrated[match(rownames(data_180831@meta.data), rownames(integrated@meta.data))] <- data_180831@meta.data$State.labels
```

```{r, fig.height = 5, fig.width = 8, fig.align = "center"}
colormap <- c(
  P='#ecdd83',
  U='#e27268',
  L='#93c8bc',
  'Seurat predicted match to P'='#b7a333',
  'Seurat predicted match to U'='#af4f48',
  'Seurat predicted match to L'='#4f8579',
  'Non-matching cells'='#b6b6b6'
)


integrated@meta.data$branch_labels_integrated <- factor(integrated@meta.data$branch_labels_integrated, levels = c('P', 'U', 'L', 'Seurat predicted match to P', 'Seurat predicted match to U', 'Seurat predicted match to L', 'Non-matching cells'))

DimPlot(integrated, reduction='umap', group.by='branch_labels_integrated', cols=colormap) + NoAxes()
```

# Harmony integration

```{r}
#harmony <- RunHarmony(data_combined, "Dataset", plot_convergence =TRUE)
#harmony <- RunUMAP(harmony, reduction='harmony', dims=1:20)
harmony <- readRDS('output/seurat_objects/wolfrum/wolfrum.180831.harmony_default_integration')
```

```{r, fig.height = 10, fig.width = 12, fig.align = "center"}
plot_grid(
  DimPlot(harmony, reduction='umap', group.by='Dataset'),
  DimPlot(harmony, reduction='umap', group.by='RNA_snn_res.0.8', label=T),
  DimPlot(harmony, reduction='umap', group.by='branch'), ncol=2
)
```

```{r}
harmony@meta.data['branch_labels_integrated'] <- wolfrum.predicted_labels@meta.data$predictions_pca.predicted_label.0.9[match(rownames(harmony@meta.data), rownames(wolfrum.predicted_labels@meta.data))]
harmony@meta.data['branch_labels_integrated'] <- unlist(lapply(harmony@meta.data$branch_labels_integrated, function(x){
  if (is.na(x)){
    return('Non-matching cells')
  } else {
      if (x == 'Progenitor'){
        return('Seurat predicted match to P')
      } else if (x == 'Metabolic') {
        return('Seurat predicted match to U')
      } else if (x == 'ECM') {
        return('Seurat predicted match to L')
      } else {
        return('Non-matching cells')
      }
  }
}))

harmony@meta.data$branch_labels_integrated[match(rownames(data_180831@meta.data), rownames(harmony@meta.data))] <- data_180831@meta.data$State.labels
```


```{r, fig.height = 5, fig.width = 7, fig.align = "center"}
colormap <- c(
  P='#ecdd83',
  U='#e27268',
  L='#93c8bc',
  'Seurat predicted match to P'='#b7a333',
  'Seurat predicted match to U'='#af4f48',
  'Seurat predicted match to L'='#4f8579',
  'Non-matching cells'='#b6b6b6'
)

harmony@meta.data$branch_labels_integrated <- factor(harmony@meta.data$branch_labels_integrated, levels = c('P', 'U', 'L', 'Seurat predicted match to P', 'Seurat predicted match to U', 'Seurat predicted match to L', 'Non-matching cells'))

DimPlot(harmony, reduction='umap', group.by='branch_labels_integrated', cols=colormap) + NoAxes()
```

#Figures

```{r, fig.height = 5, fig.width = 7, fig.align = "center"}
harmony_180831 <- DimPlot(harmony, reduction='umap', group.by='branch_labels_integrated', cols=colormap) + NoAxes()
harmony_180831_vf <- AugmentPlot(harmony_180831, width=7, height=5, dpi=500)
#save_plot("../figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_harmony_branch-labels.vf.pdf", harmony_180831_vf, base_width=5, base_height=5)
#save_plot("../figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_harmony_branch-labels.pdf", harmony_180831, base_width=7, base_height=5)
harmony_180831
```

```{r, fig.height = 5, fig.width = 12, fig.align = "center"}
harmony_180831_split_by_source <- DimPlot(harmony, reduction='umap', group.by='branch_labels_integrated', cols=colormap, split.by='Dataset') 
#harmony_180831_split_by_source_vf <- lapply(X = harmony_180831_split_by_source, FUN = AugmentPlot)
#save_plot("../figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_harmony_branch-labels.split-dataset.pdf", harmony_180831_split_by_source, base_width=12, base_height=5)
harmony_180831_split_by_source
```

```{r, fig.height = 5, fig.width = 7, fig.align = "center"}
harmony_180831 <- DimPlot(harmony, reduction='umap', group.by='Dataset') + NoAxes()
harmony_180831_vf <- AugmentPlot(harmony_180831, width=5, height=5, dpi=500)
#save_plot("../figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_harmony_dataset.vf.pdf", harmony_180831_vf, base_width=5, base_height=5)
#save_plot("../figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_harmony_dataset.pdf", harmony_180831, base_width=7, base_height=5)
harmony_180831
```

```{r, fig.height = 4, fig.width = 6, fig.align = "center"}
labels <- unlist(lapply(wolfrum.predicted_labels@meta.data$predictions_pca.predicted_label.0.7, function(x){
  if (is.na(x)){
    return('Non-matching cells')
  } else if(x == 'ECM'){
    return('L')
  } else if(x == 'Metabolic'){
    return('U')
  } else if (x == 'Progenitor'){
    return('P')
  }
}))

wolfrum.predicted_labels@meta.data['predictions_pca.predicted_label.0.7_labels'] <- labels

wolfrum.predicted_labels@meta.data['predictions_pca.predicted_label.0.7_labels'] <- factor(wolfrum.predicted_labels@meta.data$predictions_pca.predicted_label.0.7_labels, levels = c("P", "L", "U", 'Non-matching cells'))
```

```{r, fig.height = 5, fig.width = 8, fig.align = "center"}
colormap.branches <- c(
  P="#ecdd83",
  U="#e27268",
  L="#93c8bc",
  'Non-matching cells'='#7a7a7a')

p_predictions <- UMAPPlot(wolfrum.predicted_labels, group.by='predictions_pca.predicted_label.0.7_labels', cols=colormap.branches) + NoAxes() + theme(legend.text=element_text(size=12), legend.key.height=unit(0.4, 'cm'), axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), plot.margin=grid::unit(c(0,0,0,0), "mm")) + labs(color='Seurat prediction')
p_predictions
```

```{r}
#save_plot("figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_predicted-labels_180831.pdf", p_predictions, base_width=8, base_height=5)
```

```{r, fig.height = 5, fig.width = 6.5, fig.align = "center"}
p_clusters <- UMAPPlot(wolfrum.predicted_labels, group.by='RNA_snn_res.0.8', label=T) + NoAxes() + theme(legend.position = "none", axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), plot.margin=grid::unit(c(0,0,0,0), "mm"))
p_clusters
```

```{r}
#save_plot("figures/figures_paper/main_figures/Figure_wolfrum/UMAP_wolfrum_clusters.pdf", p_clusters, base_width=6, base_height=5)
```