In [1]:
# Import required modules
import os
import shutil
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import math

  from pandas.core.index import RangeIndex


In [2]:
# Get a list of all the folders to be processed
path_main = "/Users/sha6hg/Desktop/IPF_scRNA/"

# List of MOUSE datasets
mouse_GSE = ["GSE111664", "GSE121611", "GSE124872", "GSE127803", "GSE129605"]

# Create a list containing path to each GSE dataset conatining Raw Counts Matrix
mouse_GSE_Path = [path_main + s + "/" + "Matrices/" for s in mouse_GSE]

In [None]:
# Only run once
# Read each Raw Counts Matrix as pandas and transpose them to make them scanpy compatible
for GSE_dir in  mouse_GSE_Path:
    # Get a list of all Raw Counts Matrix in the GSE folder 
    file_list = os.listdir(GSE_dir)
    # iterate through each Raw Counts Matrix and process it and save the plots in the respective directories
    for file in file_list:
        df1 = pd.read_csv((GSE_dir + file), header = 0, index_col = 0, sep = "\t")
        print(GSE_dir + file)
        df2 = df1.T
        #print(df2.head())
        filename = file.split(".")[0] + "_Transposed.txt"
        df2.to_csv((GSE_dir + filename), header = True, index = True, sep = "\t")

In [3]:
##################################################### STEP 1 #####################################################
# 1. Read Raw counts matrix and load as annotated data object
# 2. Filter object based on min_genes = 200 and min_cells = 3
# 3. Obtaining Top 20 Genes expressed across all cells and save plot to image
# 4. Calculate mitochondrial percentage
# 5. Create a plot for QC metrics to filter annotated data object
# 6. Save the AnnData object to reload again for filtering based on QC Metrics based on user defined parameters
##################################################################################################################

# Run a loop for processing Raw Counts Matrix for each directory
for GSE_dir in  mouse_GSE_Path:
    # Get a list of all Raw Counts Matrix in the GSE folder 
    all_file_list = os.listdir(GSE_dir)
    # iterate through each Raw Counts Matrix and process it and save the plots in the respective directories
    for file in all_file_list:
        if "_Transposed" in file and file.endswith(".txt"):
            # Make a directory for each sample
            results_dir = GSE_dir + file.split(".")[0].split("_Transposed")[0] + "_Results/"
            os.mkdir(results_dir)
            common_filename = results_dir + file.split(".")[0] + "_"
            # Read the raw counts matrix to annotated data object
            adata = sc.read_text((GSE_dir + file), delimiter=None, first_column_names=None, dtype='float32')
            # Filter cells based on the min number of genes expresses
            sc.pp.filter_cells(adata, min_genes=200)
            # Filter genes based on the min number of expressing the gene
            sc.pp.filter_genes(adata, min_cells=3)

            # Show those genes that yield the highest fraction of counts in each single cells, across all cells.
            filename = common_filename + "Top20Genes.png"
            ax = sc.pl.highest_expr_genes(adata, n_top=20, show=False)
            ax.figure.savefig(filename, dpi = 600, bbox_inches='tight')
            
            # Calculate the mitochondrial percentage
            adata.var['mt'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
            sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, inplace=True)

            # Create a plot for calculating QC metrics of the data
            filename = common_filename + "QC_metrics.png"
            fig = sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'], jitter=0.4, multi_panel=True, show=False)
            fig.savefig(filename, dpi = 600, bbox_inches='tight')

            # Save the annotated data at this point: Step1
            filename = common_filename + "Step1.h5ad"
            adata.write(filename, compression = False)
            
            plt.close("all")

        print("Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.")
      

Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics to proceed to next step.
Annotated Data object created for Step1 for all datasets. Upload file containing filter for QC metrics t

In [4]:
##################################################### STEP 2 #####################################################
# 1. Load the AnnData object from Step 1 and filter for percent.mito and n.gene from user input file / calculating UQ
# 2. Normalize the counts matrix
# 3. Identify Highly Variable Genes and generate plot
# 4. Scale data using regression based on percent.mito and total_counts
# 5. Reduce the dimensionality of the data by running PCA
# 6. Compute Nearest Neigbourhood Graph, embed the graph onto UMAP and Create Clustering Graph
# 7. Find marker genes for each cluster
##################################################################################################################

# Read excel file containing filtering data
file = "/Users/sha6hg/Desktop/IPF_scRNA/IPF_SingleCellAnalysis.xlsx"
df1 = pd.read_excel(file, sheet_name = "Step1_AnalysisInfo", header = 0, index_col = 2, nrows = 150)

# Run a loop for loading AnnData object from respective directories
for GSE_dir in  mouse_GSE_Path:
    # Get a list of all Sub Directories for each Sample within Main directory 
    all_file_list = os.listdir(GSE_dir)
    list_dir = [directory for directory in all_file_list if os.path.isdir(GSE_dir + directory)]
    # Load AnnData object for each sample and process further
    for directory in list_dir:
        common_filename = GSE_dir + directory + "/" + directory.split("_")[0] + "_" + directory.split("_")[1] + "_" + directory.split("_")[2] + "_"
        filename = [i for i in os.listdir(GSE_dir + directory) if ".h5ad" in i][0]
        filename1 = GSE_dir + directory + "/" + filename
        # Load AnnData object
        adata = sc.read_h5ad(filename1)
        # Filter based on gene counts and mitochondrial percentage
        gene_count = df1.at[directory.split("_")[1], "N_COUNT"]
        perc_mito = df1.at[directory.split("_")[1], "PERCENT_MITO"]
        adata = adata[adata.obs.n_genes_by_counts < gene_count, :]
        if math.isnan(perc_mito) == False:
            adata = adata[adata.obs.pct_counts_mt < perc_mito, :]
        
        # Normalize data
        sc.pp.normalize_total(adata, target_sum=1e4)
        sc.pp.log1p(adata)
        
        # Identify highly-variable genes
        sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
        filename = common_filename + "HVG.png"
        ax = sc.pl.highly_variable_genes(adata, show=False)
        ax.figure.savefig(filename, dpi = 600, bbox_inches='tight')
        
        # Add the raw data to the AnnData object before further processing
        adata.raw = adata
        
        # Filter out HVG
        adata = adata[:, adata.var.highly_variable]
        
        # Regressing perc_mito and total_count
        sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
        # Scale each gene to unit variance
        sc.pp.scale(adata, max_value=10)

        # Reduce the dimensionality of the data by running PCA
        sc.tl.pca(adata, svd_solver='arpack')
        
        # Create a Scatter Plot of the PCA
        #gene_name = "CST3"
        #filename = common_filename + "gene_name" + "PCA_ScatterPlot.png"
        #sc.pl.pca(adata, color=gene_name, show=False)
        #fig.savefig(filename, dpi = 600, bbox_inches='tight')
        
        # Create a variance ratio plot of the PCA to assess how many PC's to use
        path = GSE_dir + directory + "/"
        os.chdir(path)
        filename = directory.split("_")[0] + "_" + directory.split("_")[1] + "_" + directory.split("_")[2]
        sc.pl.pca_variance_ratio(adata, log=True, show=False, save = filename)
        # Move the file to wanted directory
        copy_from = GSE_dir + directory + "/" + "figures" + "/"
        copy_to = GSE_dir + directory + "/"
        # Rename file before moving
        file_to_rename = GSE_dir + directory + "/" + "figures" + "/" + os.listdir(copy_from)[0]
        renamed_file = GSE_dir + directory + "/" + "figures" + "/" + directory.split("_")[0] + "_" + directory.split("_")[1] + "_" + directory.split("_")[2] + "_" +"PCA_VarianceRatioPlot.pdf"
        os.rename(file_to_rename, renamed_file)
        # move the file
        file_to_move = os.listdir(copy_from)[0]
        shutil.move(copy_from + file_to_move, copy_to + file_to_move)
        # Remove unwanted directory
        os.rmdir(copy_from)


        # Compute Nearest Neigbourhood Graph
        sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
        # Embedding the graph to UMAP
        sc.tl.umap(adata)
        # UMAP Clustering 
        sc.tl.leiden(adata)
        #sc.tl.louvain(adata)
        # Plot the UMAP Clustering
        filename = common_filename + "UMAP_Clustering.png"
        ax = sc.pl.umap(adata, color='leiden', show=False, size =10)
        ax.figure.savefig(filename, dpi = 600, bbox_inches='tight')
        # Plot genes on the UMAP Plot
        #gene_list = ['CST3', 'NKG7', 'PPBP']
        #sc.pl.umap(adata, color=gene_list)
        
        # Find marker genes for each cluster
        sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')
        #sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
        result = adata.uns['rank_genes_groups']
        groups = result['names'].dtype.names
        df1_1 = pd.DataFrame({group + '_' + key[:1]: result[key][group] for group in groups for key in ['names', 'pvals']})
        
        # Create a dataframe for storing all the Differentially expressed markers per cluster
        cluster_number = int((len(df1_1.columns)/2))
        df_final = pd.DataFrame()
        for i in range(0, cluster_number):
            expression = "^" + str(i) + "_" 
            df2 = df1_1.filter(regex=expression, axis=1)
            df2.columns = ["Gene", "p-Value"]
            df2.loc[:,"Cluster"] = "Cluster_" + str(i)
            df_final = pd.concat([df_final, df2], batch)
        # Write the DataFrame to file
        filename = common_filename + "DE_MarkerPerCluster.txt"
        df_final.to_csv(filename, header = True, sep = "\t", index = False)

        # Save the annotated data at this point: Step2
        filename = common_filename + "Step2.h5ad"
        adata.write(filename, compression = False)
        
        plt.close("all")

  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  view_to_actual(data)




  n_components
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [None]:
##################################################### STEP 3 #####################################################
# 1. Integrating all the datasets together in to one AnnData object
##################################################################################################################
for GSE_dir in  mouse_GSE_Path:
    # Get a list of all Raw Counts Matrix in the GSE folder 
    all_file_list = os.listdir(GSE_dir)
    list_dir = [directory for directory in all_file_list if os.path.isdir(GSE_dir + directory)]
    # iterate through each Raw Counts Matrix and process it and save the plots in the respective directories
    for directory in list_dir:
        if "_Results" in directory:
            dir_name = GSE_dir + directory 
            file_list_all = os.listdir(dir_name)
            file1 = [file for file in file_list_all if "Step2" in file]
            print(file1)


In [None]:
############################################# REMOVE RESULTS FOLDERS #############################################
# Script to remove results folders 
##################################################################################################################
for GSE_dir in  mouse_GSE_Path:
    # Get a list of all Raw Counts Matrix in the GSE folder 
    all_file_list = os.listdir(GSE_dir)
    list_dir = [directory for directory in all_file_list if os.path.isdir(GSE_dir + directory)]
    # iterate through each Raw Counts Matrix and process it and save the plots in the respective directories
    for directory in list_dir:
        if "_Results" in directory:
            dir_name = GSE_dir + directory 
            for file in os.listdir(dir_name):
                os.remove(dir_name + "/" + file) 
            os.rmdir(dir_name)