In [1]:
#Preprocessing GSE138266

# Librerie necessarie
library(tools)  # Per la funzione file_path_sans_ext
library(fs)     # Per le funzioni di gestione dei file

# Sezione per dividere il RAW output del dataset in cartelle con genes.tsv, barcodes.tsv, matrix.mtx da poter poi aprire con Read10X


# Definizione delle directory di input e output
dataDir <- "dataset/rawdataset/"
GEO <- "."
datasetDir <- file.path(dataDir, GEO, "GSE138266_RAW")
outputDir <- file.path(dataDir, GEO, "organized_data")

In [2]:
# Function to extract unique prefixes from file names
extract_prefix <- function(filename) {
  sub("(_barcodes.tsv.gz|_genes.tsv.gz|_matrix.mtx.gz)", "", filename)
}

In [3]:
files = list.files(datasetDir)
# Creazione della directory di output se non esiste
dir_create(outputDir)

# Funzione per copiare e rinominare i file nelle cartelle appropriate
organize_files <- function(datasetDir, files, outputDir) {
  prefixes <- unique(sapply(files, extract_prefix))
  
  for (single_prefix in prefixes) {
    
    barcode_file <- file.path(datasetDir, paste0(single_prefix, "_barcodes.tsv.gz"))
    gene_file <- file.path(datasetDir, paste0(single_prefix, "_genes.tsv.gz"))
    matrix_file <- file.path(datasetDir, paste0(single_prefix, "_matrix.mtx.gz"))
    print(barcode_file)
    
    # Crea la cartella se non esiste già
    folder_path <- file.path(outputDir, single_prefix)
    dir_create(folder_path, recurse = TRUE)
    
    # Rinomina e copia il file nella cartella corretta
    #new_filename <- paste0(prefix, ".", ext)
    file_copy(barcode_file, file.path(folder_path, "barcodes.tsv"), overwrite = TRUE)
    file_copy(gene_file, file.path(folder_path, "genes.tsv"), overwrite = TRUE)
    file_copy(matrix_file, file.path(folder_path, "matrix.mtx"), overwrite = TRUE)
    
    
  }
}

In [4]:
# Esegui la funzione per organizzare i file
organize_files(datasetDir,files, outputDir)

# Messaggio di completamento
cat("Organizzazione dei file completata.\n")

[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104122_MS19270_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104123_MS58637_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104124_MS71658_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104125_MS49131_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104126_MS60249_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104127_MS74594_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104128_PST83775_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104129_PTC32190_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104130_PST95809_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104131_PTC41540_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./GSE138266_RAW/GSM4104132_PST45044_CSF_GRCh38_barcodes.tsv.gz"
[1] "dataset/rawdataset//./

In [5]:
options(Seurat.object.assay.version = "v3")
options(Seurat.object.assay.calcn = TRUE)
library(Seurat)
library(Matrix)
library(dplyr)  # Ensure this is loaded for %>%
library(Azimuth)

read_10x_data <- function(folder_path) {
  # Chiamata a Read10X con i file corretti
  data <- Read10X(data.dir = folder_path)
  
  # Crea un oggetto Seurat con i dati letti
  seurat_obj <- CreateSeuratObject(counts = data, 
                                   assay = "RNA")
  
  return(seurat_obj)
}

subfolders <- list.dirs(outputDir, full.names = TRUE, recursive = FALSE)
print(length(subfolders))
seurat_list <- vector("list", length = length(subfolders))

#Inizializzo i tag che userò nel merge
prefixes <- unique(sapply(files, extract_prefix))
dataset_tags <- sapply(prefixes, function(prefix){
  split_prefix <- strsplit(prefix, "_")[[1]]
  patient_code <- split_prefix[[2]]
  sample_type <- split_prefix[[3]]
  paste(patient_code, sample_type, sep="_")
})
rm(prefixes)

Loading required package: SeuratObject

Loading required package: sp

‘SeuratObject’ was built under R 4.4.1 but the current version is
4.4.2; it is recomended that you reinstall ‘SeuratObject’ as the ABI
for R may have changed

‘SeuratObject’ was built with package ‘Matrix’ 1.6.5 but the current
version is 1.7.2; it is recomended that you reinstall ‘SeuratObject’ as
the ABI for ‘Matrix’ may have changed


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Registered S3 method overwritten by 'SeuratDisk':
  method            from  
  as.sparse.H5Group Seurat



Attaching shinyBS



[1] 22


In [6]:
# Define the preprocessing function
preprocess_seurat <- function(seurat_obj, ns = 5) {

  seurat_obj <- subset(seurat_obj, subset = nFeature_RNA < 3000 & nCount_RNA <20000 & percent.mt < 6)
  
  return(seurat_obj)
}


In [7]:
# Define a function to process each dataset
process_dataset <- function(subfolder, dataset_tag, seurat_list, i) {
  cat("Currently processing dataset:", i, "\n")  # Use cat for better printing
  
  # Read the data from the current subfolder
  seurat_obj <- read_10x_data(subfolder)
  

  # Extract metadata from dataset_tag
  dataset_tag_split <- strsplit(dataset_tag, "_")[[1]]
  type <- dataset_tag_split[2]
  patient <- dataset_tag_split[1]
  
  # Determine patient class
  patient_class <- ifelse(grepl("^MS", patient), "MS",
                          ifelse(grepl("^PTC|^PST", patient), "CTRL", NA))
  
  # Add metadata to the Seurat object
  seurat_obj <- AddMetaData(seurat_obj, metadata = type, col.name = "Tissue.Type")
  seurat_obj <- AddMetaData(seurat_obj, metadata = patient, col.name = "Patient")
  seurat_obj <- AddMetaData(seurat_obj, metadata = patient_class, col.name = "Patient-Class")
  seurat_obj$orig.ident <- paste0(patient, patient_class, type)
  seurat_obj$dataset <- "gse138266"
  seurat_obj$nFeature_RNA <- colSums(seurat_obj@assays$RNA@counts > 0)
     seurat_obj$nCount_RNA <- colSums(seurat_obj@assays$RNA@counts)

    
  # Apply specific filtering for dataset 7
  if (i == 7) {
    seurat_obj <- subset(seurat_obj, subset = nCount_RNA > 2000 & nCount_RNA < 45000 & 
                         nFeature_RNA > 40 & nFeature_RNA < 5000)
  }
  
  # Calculate mitochondrial percentage
  seurat_obj[["percent.mt"]] <- PercentageFeatureSet(seurat_obj, pattern = "^MT-")


  # Print metadata for debugging
  print(head(seurat_obj@meta.data))
  
    
  # Preprocess the Seurat object
  seurat_obj <- preprocess_seurat(seurat_obj)
  
  # Run Azimuth for cell type annotation
  seurat_obj <- RunAzimuth(seurat_obj, ref = 'pbmcref')
  
  # Add the processed Seurat object to the list
  seurat_list[[i]] <- seurat_obj
  
  return(seurat_list)
}

# Main loop to process all subfolders
seurat_list <- list()  # Initialize list to store Seurat objects
for (i in seq_along(subfolders)) {
  tryCatch({
    seurat_list <- process_dataset(subfolders[i], dataset_tags[i], seurat_list, i)
  }, error = function(e) {
    cat("Error processing dataset", i, ":", e$message, "\n")  # Handle errors gracefully
  })
}

Currently processing dataset: 1 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                     orig.ident nCount_RNA nFeature_RNA Tissue.Type Patient
AAACCTGAGTGGGCTA-1 MS19270MSCSF       2566          651         CSF MS19270
AAACCTGAGTGTTAGA-1 MS19270MSCSF       2284          844         CSF MS19270
AAACCTGGTCGCGTGT-1 MS19270MSCSF      17837         3022         CSF MS19270
AAACCTGGTCTCCACT-1 MS19270MSCSF       3721         1058         CSF MS19270
AAACCTGGTTTACTCT-1 MS19270MSCSF       3422         1130         CSF MS19270
AAACCTGTCTCCTATA-1 MS19270MSCSF       4175         1159         CSF MS19270
                   Patient-Class   dataset percent.mt
AAACCTGAGTGGGCTA-1            MS gse138266   2.455183
AAACCTGAGTGTTAGA-1            MS gse138266   2.977233
AAACCTGGTCGCGTGT-1            MS gse138266   2.657398
AAACCTGGTCTCCACT-1            MS gse138266   2.499328
AAACCTGGTTTACTCT-1            MS gse138266   3.535944
AAACCTGTCTCCTATA-1            MS gse138266   1.940120


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 4317 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 2 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                     orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGAATAGGG-1 MS58637MSCSF         CSF MS58637            MS gse138266
AAACCTGAGCAATATG-1 MS58637MSCSF         CSF MS58637            MS gse138266
AAACCTGAGGCAATTA-1 MS58637MSCSF         CSF MS58637            MS gse138266
AAACCTGCAAATACAG-1 MS58637MSCSF         CSF MS58637            MS gse138266
AAACCTGCACGGCCAT-1 MS58637MSCSF         CSF MS58637            MS gse138266
AAACCTGCAGTATAAG-1 MS58637MSCSF         CSF MS58637            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGAATAGGG-1          845       2083   1.824292
AAACCTGAGCAATATG-1          739       1624   1.108374
AAACCTGAGGCAATTA-1          551       1216   1.973684
AAACCTGCAAATACAG-1          741       1520   1.250000
AAACCTGCACGGCCAT-1          626       1247   2.165196
AAACCTGCAGTATAAG-1         1741       5322   1.672304


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 3269 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 3 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                     orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGTCTGTGCAA-1 MS71658MSCSF         CSF MS71658            MS gse138266
AAACGGGAGAGGTTAT-1 MS71658MSCSF         CSF MS71658            MS gse138266
AAACGGGAGCCCAATT-1 MS71658MSCSF         CSF MS71658            MS gse138266
AAACGGGCACTATCTT-1 MS71658MSCSF         CSF MS71658            MS gse138266
AAACGGGGTACAGTGG-1 MS71658MSCSF         CSF MS71658            MS gse138266
AAACGGGGTACAGTTC-1 MS71658MSCSF         CSF MS71658            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGTCTGTGCAA-1          831       2487   1.527945
AAACGGGAGAGGTTAT-1          734       1752   3.995434
AAACGGGAGCCCAATT-1         1090       3856   1.685685
AAACGGGCACTATCTT-1          993       3464   1.501155
AAACGGGGTACAGTGG-1          840       2491   1.686070
AAACGGGGTACAGTTC-1          868       2215   1.083521


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 3550 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 4 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                     orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGCAACAACCT-1 MS49131MSCSF         CSF MS49131            MS gse138266
AAACCTGCATTCTCAT-1 MS49131MSCSF         CSF MS49131            MS gse138266
AAACCTGTCTCGGACG-1 MS49131MSCSF         CSF MS49131            MS gse138266
AAACGGGAGTACGTAA-1 MS49131MSCSF         CSF MS49131            MS gse138266
AAACGGGTCTCATTCA-1 MS49131MSCSF         CSF MS49131            MS gse138266
AAACGGGTCTGAGGGA-1 MS49131MSCSF         CSF MS49131            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGCAACAACCT-1          885       2419   2.315006
AAACCTGCATTCTCAT-1         1156       3157   2.343997
AAACCTGTCTCGGACG-1         2369      10293   4.012436
AAACGGGAGTACGTAA-1         1238       4048   2.322134
AAACGGGTCTCATTCA-1          918       3015   2.885572
AAACGGGTCTGAGGGA-1          869       3006   1.563540


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 2748 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 5 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                     orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGAGTGAGA-1 MS60249MSCSF         CSF MS60249            MS gse138266
AAACCTGAGCGTAATA-1 MS60249MSCSF         CSF MS60249            MS gse138266
AAACCTGAGCTCCTCT-1 MS60249MSCSF         CSF MS60249            MS gse138266
AAACCTGCAACGATCT-1 MS60249MSCSF         CSF MS60249            MS gse138266
AAACCTGCAAGCCCAC-1 MS60249MSCSF         CSF MS60249            MS gse138266
AAACCTGCACACCGAC-1 MS60249MSCSF         CSF MS60249            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGAGTGAGA-1          484        913   3.943045
AAACCTGAGCGTAATA-1         1500       3485   3.845050
AAACCTGAGCTCCTCT-1         1315       2566   2.689010
AAACCTGCAACGATCT-1          716       1534   2.737940
AAACCTGCAAGCCCAC-1         2093       5219   2.644185
AAACCTGCACACCGAC-1          794       1683   2.614379


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 6411 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 6 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                     orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGCATGAACCT-1 MS74594MSCSF         CSF MS74594            MS gse138266
AAACCTGGTGTCCTCT-1 MS74594MSCSF         CSF MS74594            MS gse138266
AAACGGGCAAGTCATC-1 MS74594MSCSF         CSF MS74594            MS gse138266
AAACGGGTCTGACCTC-1 MS74594MSCSF         CSF MS74594            MS gse138266
AAAGCAAGTAGGCTGA-1 MS74594MSCSF         CSF MS74594            MS gse138266
AAAGTAGGTCGCGTGT-1 MS74594MSCSF         CSF MS74594            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGCATGAACCT-1         1037       3566   3.365115
AAACCTGGTGTCCTCT-1         1254       3717   3.578154
AAACGGGCAAGTCATC-1         1525       6313   3.801679
AAACGGGTCTGACCTC-1         1465       6295   3.208896
AAAGCAAGTAGGCTGA-1         1401       4613   5.549534
AAAGTAGGTCGCGTGT-1         1248       4239   4.906818


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 2003 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 7 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                        orig.ident Tissue.Type  Patient Patient-Class   dataset
AAACCTGCAGGAACGT-1 PST83775CTRLCSF         CSF PST83775          CTRL gse138266
AAACCTGCATGGGACA-1 PST83775CTRLCSF         CSF PST83775          CTRL gse138266
AAACCTGGTGTATGGG-1 PST83775CTRLCSF         CSF PST83775          CTRL gse138266
AAACGGGAGGAGTTTA-1 PST83775CTRLCSF         CSF PST83775          CTRL gse138266
AAACGGGTCACATAGC-1 PST83775CTRLCSF         CSF PST83775          CTRL gse138266
AAACGGGTCATGTCTT-1 PST83775CTRLCSF         CSF PST83775          CTRL gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGCAGGAACGT-1         1346       5658  1.8204312
AAACCTGCATGGGACA-1          980       3178  2.6117055
AAACCTGGTGTATGGG-1         1148       4481  1.7406829
AAACGGGAGGAGTTTA-1         1298       4786  1.5670706
AAACGGGTCACATAGC-1         1267       3917  0.9956599
AAACGGGTCATGTCTT-1          955       3822  1.2820513


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 1845 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 8 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                        orig.ident Tissue.Type  Patient Patient-Class   dataset
AAACCTGCATCAGTAC-1 PTC32190CTRLCSF         CSF PTC32190          CTRL gse138266
AAACCTGGTATAATGG-1 PTC32190CTRLCSF         CSF PTC32190          CTRL gse138266
AAACCTGGTGGTGTAG-1 PTC32190CTRLCSF         CSF PTC32190          CTRL gse138266
AAACCTGTCAACACTG-1 PTC32190CTRLCSF         CSF PTC32190          CTRL gse138266
AAACCTGTCCATTCTA-1 PTC32190CTRLCSF         CSF PTC32190          CTRL gse138266
AAACCTGTCGAGAACG-1 PTC32190CTRLCSF         CSF PTC32190          CTRL gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGCATCAGTAC-1         1395       4751   3.051989
AAACCTGGTATAATGG-1          795       2499   2.480992
AAACCTGGTGGTGTAG-1         1055       4264   2.274859
AAACCTGTCAACACTG-1         1481       6528   2.512255
AAACCTGTCCATTCTA-1         1161       4411   2.108365
AAACCTGTCGAGAACG-1          734       2364   3.045685


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 2868 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 9 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                        orig.ident Tissue.Type  Patient Patient-Class   dataset
AAACCTGAGCTCTCGG-1 PST95809CTRLCSF         CSF PST95809          CTRL gse138266
AAACCTGCATCGATGT-1 PST95809CTRLCSF         CSF PST95809          CTRL gse138266
AAACCTGGTAGAGTGC-1 PST95809CTRLCSF         CSF PST95809          CTRL gse138266
AAACGGGAGGCCCTTG-1 PST95809CTRLCSF         CSF PST95809          CTRL gse138266
AAACGGGAGTTAAGTG-1 PST95809CTRLCSF         CSF PST95809          CTRL gse138266
AAACGGGGTCGCGGTT-1 PST95809CTRLCSF         CSF PST95809          CTRL gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGCTCTCGG-1          645       1432   2.374302
AAACCTGCATCGATGT-1         2203       9289   1.883949
AAACCTGGTAGAGTGC-1          785       2137   1.824988
AAACGGGAGGCCCTTG-1          821       2724   2.092511
AAACGGGAGTTAAGTG-1          793       2537   2.995664
AAACGGGGTCGCGGTT-1          791       2416   2.442053


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 1216 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 10 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                        orig.ident Tissue.Type  Patient Patient-Class   dataset
AAACCTGAGACTAAGT-1 PTC41540CTRLCSF         CSF PTC41540          CTRL gse138266
AAACCTGAGCTAGGCA-1 PTC41540CTRLCSF         CSF PTC41540          CTRL gse138266
AAACCTGAGGAGTCTG-1 PTC41540CTRLCSF         CSF PTC41540          CTRL gse138266
AAACCTGAGGCTACGA-1 PTC41540CTRLCSF         CSF PTC41540          CTRL gse138266
AAACCTGTCCTACAGA-1 PTC41540CTRLCSF         CSF PTC41540          CTRL gse138266
AAACGGGAGGAGTTGC-1 PTC41540CTRLCSF         CSF PTC41540          CTRL gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGACTAAGT-1          848       2962   1.890614
AAACCTGAGCTAGGCA-1          937       3640   1.923077
AAACCTGAGGAGTCTG-1          967       3747   2.241793
AAACCTGAGGCTACGA-1          625       2369   1.899536
AAACCTGTCCTACAGA-1          876       2798   2.180129
AAACGGGAGGAGTTGC-1          489       1826   0.000000


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 1451 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 11 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                        orig.ident Tissue.Type  Patient Patient-Class   dataset
AAACCTGAGATGTAAC-1 PST45044CTRLCSF         CSF PST45044          CTRL gse138266
AAACCTGAGCCGGTAA-1 PST45044CTRLCSF         CSF PST45044          CTRL gse138266
AAACCTGAGGGTCTCC-1 PST45044CTRLCSF         CSF PST45044          CTRL gse138266
AAACCTGAGTCCGTAT-1 PST45044CTRLCSF         CSF PST45044          CTRL gse138266
AAACCTGAGTGGTCCC-1 PST45044CTRLCSF         CSF PST45044          CTRL gse138266
AAACCTGAGTGTCCAT-1 PST45044CTRLCSF         CSF PST45044          CTRL gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGATGTAAC-1         1824       6754   2.280130
AAACCTGAGCCGGTAA-1          838       2426   3.173949
AAACCTGAGGGTCTCC-1         1813       7822   1.866530
AAACCTGAGTCCGTAT-1          875       2832   1.518362
AAACCTGAGTGGTCCC-1          853       2969   1.919838
AAACCTGAGTGTCCAT-1         1794       7486   1.669784


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 4303 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 12 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                        orig.ident Tissue.Type  Patient Patient-Class   dataset
AAACCTGCACCCTATC-1 PTC85037CTRLCSF         CSF PTC85037          CTRL gse138266
AAACCTGCATCCGGGT-1 PTC85037CTRLCSF         CSF PTC85037          CTRL gse138266
AAACCTGGTCGGCATC-1 PTC85037CTRLCSF         CSF PTC85037          CTRL gse138266
AAACCTGTCCTTTACA-1 PTC85037CTRLCSF         CSF PTC85037          CTRL gse138266
AAACGGGGTAAAGTCA-1 PTC85037CTRLCSF         CSF PTC85037          CTRL gse138266
AAACGGGTCCAGATCA-1 PTC85037CTRLCSF         CSF PTC85037          CTRL gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGCACCCTATC-1          866       2911 2.23290965
AAACCTGCATCCGGGT-1          414       1544 0.06476684
AAACCTGGTCGGCATC-1         1258       3332 2.19087635
AAACCTGTCCTTTACA-1          904       2644 2.19364599
AAACGGGGTAAAGTCA-1          867       3077 3.83490413
AAACGGGTCCAGATCA-1         1108       4519 1.72604559


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 1495 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 13 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                       orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGAGGACGG-1 MS19270MSPBMCs       PBMCs MS19270            MS gse138266
AAACCTGAGCCAGTTT-1 MS19270MSPBMCs       PBMCs MS19270            MS gse138266
AAACCTGAGTAGTGCG-1 MS19270MSPBMCs       PBMCs MS19270            MS gse138266
AAACCTGAGTCTTGCA-1 MS19270MSPBMCs       PBMCs MS19270            MS gse138266
AAACCTGAGTGTCCAT-1 MS19270MSPBMCs       PBMCs MS19270            MS gse138266
AAACCTGCAAATTGCC-1 MS19270MSPBMCs       PBMCs MS19270            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGAGGACGG-1          827       2630   3.231939
AAACCTGAGCCAGTTT-1         1604       6161   3.538387
AAACCTGAGTAGTGCG-1          823       1730   3.236994
AAACCTGAGTCTTGCA-1         1070       3312   4.347826
AAACCTGAGTGTCCAT-1          916       2751   3.598691
AAACCTGCAAATTGCC-1          862       3529   3.372060


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 6489 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 14 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                       orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGGAGCGTT-1 MS71658MSPBMCs       PBMCs MS71658            MS gse138266
AAACCTGCACCTCGGA-1 MS71658MSPBMCs       PBMCs MS71658            MS gse138266
AAACCTGGTCAATGTC-1 MS71658MSPBMCs       PBMCs MS71658            MS gse138266
AAACCTGGTCTCACCT-1 MS71658MSPBMCs       PBMCs MS71658            MS gse138266
AAACCTGGTCTGATTG-1 MS71658MSPBMCs       PBMCs MS71658            MS gse138266
AAACCTGGTGGGTCAA-1 MS71658MSPBMCs       PBMCs MS71658            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGGAGCGTT-1          169       4893  0.1021868
AAACCTGCACCTCGGA-1         1177       5011  5.7273997
AAACCTGGTCAATGTC-1          227       7428  0.1077006
AAACCTGGTCTCACCT-1         2356      14172  2.5825572
AAACCTGGTCTGATTG-1         1275       5080  3.2086614
AAACCTGGTGGGTCAA-1          205       8281  0.2294409


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 4169 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 15 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                       orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGCCGTCGT-1 MS49131MSPBMCs       PBMCs MS49131            MS gse138266
AAACCTGAGCTGGAAC-1 MS49131MSPBMCs       PBMCs MS49131            MS gse138266
AAACCTGAGGAGTTGC-1 MS49131MSPBMCs       PBMCs MS49131            MS gse138266
AAACCTGAGTGGACGT-1 MS49131MSPBMCs       PBMCs MS49131            MS gse138266
AAACCTGCACATCTTT-1 MS49131MSPBMCs       PBMCs MS49131            MS gse138266
AAACCTGGTAAATGAC-1 MS49131MSPBMCs       PBMCs MS49131            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGCCGTCGT-1          631       1244   7.395498
AAACCTGAGCTGGAAC-1          975       3806   2.601156
AAACCTGAGGAGTTGC-1          810       2358   3.774385
AAACCTGAGTGGACGT-1          853       3066   1.467710
AAACCTGCACATCTTT-1         1212       4465   1.657335
AAACCTGGTAAATGAC-1          973       2880   3.784722


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 7174 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 16 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                       orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGTTCGCGC-1 MS60249MSPBMCs       PBMCs MS60249            MS gse138266
AAACCTGCAAGCCTAT-1 MS60249MSPBMCs       PBMCs MS60249            MS gse138266
AAACCTGCACGCCAGT-1 MS60249MSPBMCs       PBMCs MS60249            MS gse138266
AAACCTGCATTCCTCG-1 MS60249MSPBMCs       PBMCs MS60249            MS gse138266
AAACCTGTCAGCTGGC-1 MS60249MSPBMCs       PBMCs MS60249            MS gse138266
AAACCTGTCATTCACT-1 MS60249MSPBMCs       PBMCs MS60249            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGTTCGCGC-1          755       1833   4.309875
AAACCTGCAAGCCTAT-1         1082       3035   4.711697
AAACCTGCACGCCAGT-1          685       1856   3.232759
AAACCTGCATTCCTCG-1         1037       2918   2.021933
AAACCTGTCAGCTGGC-1         1077       3043   2.826158
AAACCTGTCATTCACT-1          921       1768   5.542986


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 5127 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 17 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                       orig.ident Tissue.Type Patient Patient-Class   dataset
AAACCTGAGAATTCCC-1 MS74594MSPBMCs       PBMCs MS74594            MS gse138266
AAACCTGAGACCGGAT-1 MS74594MSPBMCs       PBMCs MS74594            MS gse138266
AAACCTGAGATAGGAG-1 MS74594MSPBMCs       PBMCs MS74594            MS gse138266
AAACCTGAGCAGCGTA-1 MS74594MSPBMCs       PBMCs MS74594            MS gse138266
AAACCTGAGCTAAGAT-1 MS74594MSPBMCs       PBMCs MS74594            MS gse138266
AAACCTGAGGCTAGGT-1 MS74594MSPBMCs       PBMCs MS74594            MS gse138266
                   nFeature_RNA nCount_RNA percent.mt
AAACCTGAGAATTCCC-1         1265       4085   3.402693
AAACCTGAGACCGGAT-1         1139       4591   3.528643
AAACCTGAGATAGGAG-1          901       2339   4.788371
AAACCTGAGCAGCGTA-1         1015       4851   3.628118
AAACCTGAGCTAAGAT-1         1116       3666   5.073650
AAACCTGAGGCTAGGT-1         1130       4274   3.252223


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 8158 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 18 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                          orig.ident Tissue.Type  Patient Patient-Class
AAACCTGCACGTCAGC-1 PST83775CTRLPBMCs       PBMCs PST83775          CTRL
AAACCTGGTCATATCG-1 PST83775CTRLPBMCs       PBMCs PST83775          CTRL
AAACCTGTCGGAGGTA-1 PST83775CTRLPBMCs       PBMCs PST83775          CTRL
AAACGGGAGGCAATTA-1 PST83775CTRLPBMCs       PBMCs PST83775          CTRL
AAACGGGAGTTCGCGC-1 PST83775CTRLPBMCs       PBMCs PST83775          CTRL
AAACGGGCAATAGAGT-1 PST83775CTRLPBMCs       PBMCs PST83775          CTRL
                     dataset nFeature_RNA nCount_RNA percent.mt
AAACCTGCACGTCAGC-1 gse138266         1028       3437   4.044225
AAACCTGGTCATATCG-1 gse138266         1173       4451   3.504830
AAACCTGTCGGAGGTA-1 gse138266         1065       4376   3.404936
AAACGGGAGGCAATTA-1 gse138266         1965       6597   3.228740
AAACGGGAGTTCGCGC-1 gse138266         2953      22203   2.531189
AAACGGGCAATAGAGT-1 gse138266          895       4018   3.011448


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 4901 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 19 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                          orig.ident Tissue.Type  Patient Patient-Class
AAACCTGAGATGAGAG-1 PTC32190CTRLPBMCs       PBMCs PTC32190          CTRL
AAACCTGAGGGCATGT-1 PTC32190CTRLPBMCs       PBMCs PTC32190          CTRL
AAACCTGCAAGTTCTG-1 PTC32190CTRLPBMCs       PBMCs PTC32190          CTRL
AAACCTGCACAACGTT-1 PTC32190CTRLPBMCs       PBMCs PTC32190          CTRL
AAACCTGCACCAGCAC-1 PTC32190CTRLPBMCs       PBMCs PTC32190          CTRL
AAACCTGCATGCAACT-1 PTC32190CTRLPBMCs       PBMCs PTC32190          CTRL
                     dataset nFeature_RNA nCount_RNA percent.mt
AAACCTGAGATGAGAG-1 gse138266         1034       4270 3.32552693
AAACCTGAGGGCATGT-1 gse138266           93       4013 0.04983803
AAACCTGCAAGTTCTG-1 gse138266          622       1537 2.40728692
AAACCTGCACAACGTT-1 gse138266          841       3864 3.93374741
AAACCTGCACCAGCAC-1 gse138266          642       1627 2.51997541
AAACCTGCATGCAACT-1 gse138266          812       1814 3.30760750


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 4176 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 20 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                          orig.ident Tissue.Type  Patient Patient-Class
AAACCTGAGATCTGCT-1 PST95809CTRLPBMCs       PBMCs PST95809          CTRL
AAACCTGAGCCAGAAC-1 PST95809CTRLPBMCs       PBMCs PST95809          CTRL
AAACCTGCAAGTACCT-1 PST95809CTRLPBMCs       PBMCs PST95809          CTRL
AAACCTGCACACCGAC-1 PST95809CTRLPBMCs       PBMCs PST95809          CTRL
AAACCTGGTCAGTGGA-1 PST95809CTRLPBMCs       PBMCs PST95809          CTRL
AAACCTGTCGTCTGCT-1 PST95809CTRLPBMCs       PBMCs PST95809          CTRL
                     dataset nFeature_RNA nCount_RNA percent.mt
AAACCTGAGATCTGCT-1 gse138266          702       1646  6.1360875
AAACCTGAGCCAGAAC-1 gse138266          131       3774  0.1059883
AAACCTGCAAGTACCT-1 gse138266          670       2007  5.8794220
AAACCTGCACACCGAC-1 gse138266           59        829  0.3618818
AAACCTGGTCAGTGGA-1 gse138266          484       1614  3.6555143
AAACCTGTCGTCTGCT-1 gse138266          533       1638  6.2271062


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 2291 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 21 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                          orig.ident Tissue.Type  Patient Patient-Class
AAACCTGAGACTGGGT-1 PTC41540CTRLPBMCs       PBMCs PTC41540          CTRL
AAACCTGAGGCATGTG-1 PTC41540CTRLPBMCs       PBMCs PTC41540          CTRL
AAACCTGAGTCATGCT-1 PTC41540CTRLPBMCs       PBMCs PTC41540          CTRL
AAACCTGCAAGGACAC-1 PTC41540CTRLPBMCs       PBMCs PTC41540          CTRL
AAACCTGCAAGGGTCA-1 PTC41540CTRLPBMCs       PBMCs PTC41540          CTRL
AAACCTGCAGGAATGC-1 PTC41540CTRLPBMCs       PBMCs PTC41540          CTRL
                     dataset nFeature_RNA nCount_RNA percent.mt
AAACCTGAGACTGGGT-1 gse138266          473       1090   1.559633
AAACCTGAGGCATGTG-1 gse138266          616       2515   3.260437
AAACCTGAGTCATGCT-1 gse138266          874       3495   2.317597
AAACCTGCAAGGACAC-1 gse138266         1487       5253   5.235104
AAACCTGCAAGGGTCA-1 gse138266          609       2200   8.272727
AAACCTGCAGGAATGC-1 gse138266          646       1518   5.665349


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 4902 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

Currently processing dataset: 22 


"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
"Feature names cannot have underscores ('_'), replacing with dashes ('-')"


                          orig.ident Tissue.Type  Patient Patient-Class
AAACCTGAGCGTGTCC-1 PTC85037CTRLPBMCs       PBMCs PTC85037          CTRL
AAACCTGAGGTCATCT-1 PTC85037CTRLPBMCs       PBMCs PTC85037          CTRL
AAACCTGAGTGGACGT-1 PTC85037CTRLPBMCs       PBMCs PTC85037          CTRL
AAACCTGTCATCGGAT-1 PTC85037CTRLPBMCs       PBMCs PTC85037          CTRL
AAACGGGAGAGGTAGA-1 PTC85037CTRLPBMCs       PBMCs PTC85037          CTRL
AAACGGGAGGTTCCTA-1 PTC85037CTRLPBMCs       PBMCs PTC85037          CTRL
                     dataset nFeature_RNA nCount_RNA percent.mt
AAACCTGAGCGTGTCC-1 gse138266           40        964  0.1037344
AAACCTGAGGTCATCT-1 gse138266          832       3269  2.6613643
AAACCTGAGTGGACGT-1 gse138266          814       3279  4.6050625
AAACCTGTCATCGGAT-1 gse138266          833       2806  5.8802566
AAACGGGAGAGGTAGA-1 gse138266          785       3027  4.6580773
AAACGGGAGGTTCCTA-1 gse138266          816       2060  4.2233010


"Overwriting miscellanous data for model"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
"Adding a dimensional reduction (refUMAP) without the associated assay being present"
detected inputs from HUMAN with id type Gene.name

reference rownames detected HUMAN with id type Gene.name

Normalizing query using reference SCT model

"735 features of the features specified were not present in both the reference query assays. 
Continuing with remaining 4265 features."
Projecting cell embeddings

Counts matrix provided is not sparse; vreating v5 assay in Seurat object

Finding query neighbors

Finding neighborhoods

Finding anchors

	Found 2946 anchors

Finding integration vectors

Finding integration vector weights

Predicting cell labels

Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"
Predicting cell labels

"Feature names cannot have underscores ('_'), replacing with dashes ('-')"

Integrating dataset 

In [9]:
merged_data <- merge(seurat_list[[1]], y = seurat_list[-1])
print(merged_data)

"Some cell names are duplicated across objects provided. Renaming to enforce unique cell names."


An object of class Seurat 
33789 features across 74735 samples within 4 assays 
Active assay: RNA (33694 features, 0 variable features)
 2 layers present: counts, data
 3 other assays present: prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3


In [10]:
only_pbmc <- subset(merged_data, subset=Tissue.Type=="PBMCs")

In [None]:
only_pbmc

In [None]:
# CONTROLLO A CAMPIONE DI ALCUNI DI QUESTI GENI
genes_test<- FetchData(only_pbmc, vars = c("OR4F29", "OR2M7", "REN", "C1orf195") )
# sono tutti zero. li elimino

# Prendo le varianze
expr_matrix <- GetAssayData(only_pbmc, slot = "counts")
gene_variances <- apply(expr_matrix, 1, var)
gene_variance_df <- data.frame(Gene = rownames(expr_matrix), Variance = gene_variances)
nonzero_variance_genes <- names(gene_variances)[gene_variances > 0] # Questi sono i geni la cui varianza è maggiore di zero

In [None]:
only_pbmc_pre_no_zero_var <- subset(only_pbmc, features = nonzero_variance_genes)

In [11]:
only_pbmc <- NormalizeData(only_pbmc)
all.genes <- rownames(only_pbmc)
only_pbmc <- ScaleData(only_pbmc, features = all.genes)

Centering and scaling data matrix



In [None]:
only_pbmc_pre_no_zero_var <- RunPCA(only_pbmc_pre_no_zero_var, features = all.genes)

In [None]:
only_pbmc_pre_no_zero_var <- RunUMAP(only_pbmc_pre_no_zero_var, dims = 1:10)

In [None]:
DimPlot(only_pbmc_pre_no_zero_var, reduction = "umap", group.by="orig.ident", label=TRUE)

In [12]:
library(Seurat)
library(SeuratDisk)

# Supponendo che `gse144744` sia un oggetto Seurat
SaveH5Seurat(only_pbmc, filename = "gse138266_pbmc.h5Seurat")
Convert("gse138266_pbmc.h5Seurat", dest = "h5ad")


Attaching package: 'SeuratDisk'


The following object is masked from 'package:Azimuth':

    Connect


Creating h5Seurat file for version 3.1.5.9900

Adding counts for RNA

Adding data for RNA

Adding scale.data for RNA

No variable features found for RNA

No feature-level metadata found for RNA

Adding counts for prediction.score.celltype.l1

Adding data for prediction.score.celltype.l1

No variable features found for prediction.score.celltype.l1

No feature-level metadata found for prediction.score.celltype.l1

Adding counts for prediction.score.celltype.l2

Adding data for prediction.score.celltype.l2

No variable features found for prediction.score.celltype.l2

No feature-level metadata found for prediction.score.celltype.l2

Adding counts for prediction.score.celltype.l3

Adding data for prediction.score.celltype.l3

No variable features found for prediction.score.celltype.l3

No feature-level metadata found for prediction.score.celltype.l3

Validating h5Seurat file

Adding scal