In [1]:
import os
import scanpy as sc
import seaborn as sb
import numpy as np
import matplotlib.pyplot as plt
import warnings

class Preprocess:
    def __init__(self, root_directory):
        self.root_directory = root_directory
        self.adata_path = None
        self.adata = None
        self.process_data()

    def process_data(self):
        warnings.simplefilter("ignore")
        for subdir in os.listdir(self.root_directory):
            subdirectory_path = os.path.join(self.root_directory, subdir)
            if not os.path.isdir(subdirectory_path):
                continue
            
            preprocessed_file = [f for f in os.listdir(subdirectory_path) if f.endswith('preprocessed.h5ad')]
            if preprocessed_file:
                print('Processed File Found')
                continue

            h5ad_files = [f for f in os.listdir(subdirectory_path) if f.endswith(('.h5ad'))]
            if not h5ad_files:
                print(f'No h5ad file in {subdirectory_path}, skipping this directory')
                continue

            if h5ad_files:
                self.adata_path = os.path.join(subdirectory_path, h5ad_files[0])
                print(f"Processing file: {self.adata_path}")

                # Load adata
                self.adata = sc.read_h5ad(self.adata_path)

                # Quality control - calculate QC covariates
                if self.adata is not None:
                    self.adata.obs['n_counts'] = self.adata.X.sum(axis=1)
                    self.adata.obs['log_counts'] = np.log(self.adata.obs['n_counts'])
                    self.adata.obs['n_genes'] = (self.adata.X > 0).sum(axis=1)

                    mt_gene_mask = np.flatnonzero([gene.startswith('MT-') for gene in self.adata.var_names])
                    self.adata.obs['mt_frac'] = np.sum(self.adata[:, mt_gene_mask].X, axis=1).A1 / self.adata.obs[
                        'n_counts']

                    # Plot and save figures
                    p1 = sc.pl.scatter(self.adata, 'n_counts', 'n_genes', color='mt_frac', size=40, show=False)
                    plot_filename = os.path.join(subdirectory_path, f"{os.path.splitext(os.path.basename(self.adata_path))[0]}_scatter_plot.png")
                    plt.savefig(plot_filename)
                    plt.close() 
                    
                    # Create a separate figure for p2
                    fig2, ax2 = plt.subplots()
                    p2 = sb.distplot(self.adata.obs['n_counts'], kde=False, ax=ax2)
                    plot_filename = os.path.join(subdirectory_path, f"{os.path.splitext(os.path.basename(self.adata_path))[0]}_ncounts_plot.png")
                    fig2.savefig(plot_filename)
                    plt.close(fig2)  # Close the figure to release resources

                    # Create a separate figure for p3
                    fig3, ax3 = plt.subplots()
                    p3 = sb.distplot(self.adata.obs['n_genes'], kde=False, ax=ax3)
                    plot_filename = os.path.join(subdirectory_path, f"{os.path.splitext(os.path.basename(self.adata_path))[0]}_ngenes_plot.png")
                    fig3.savefig(plot_filename)
                    plt.close(fig3)  # Close the figure to release resources
                    
                    # Custom filtering
                    min_counts = np.percentile(self.adata.obs['n_counts'], 5)
                    max_counts = np.percentile(self.adata.obs['n_counts'], 95)

                    sc.pp.filter_cells(self.adata, min_counts=min_counts)
                    sc.pp.filter_cells(self.adata, max_counts=max_counts)
                    self.adata = self.adata[self.adata.obs['mt_frac'] < 0.2]
                    sc.pp.filter_cells(self.adata, min_genes=400)
                    sc.pp.filter_genes(self.adata, min_cells=10)

                    # Write the modified adata
                    directory, filename = os.path.split(self.adata_path)
                    filename_no_ext, ext = os.path.splitext(os.path.basename(filename))
                    new_filename = f"{filename_no_ext}_preprocessed{ext}"
                    new_path = os.path.join(subdirectory_path, new_filename)
                    self.adata.write_h5ad(new_path)
                    print(f"Filtered adata saved to: {new_path}")
    
                else:
                    print("No adata loaded for processing.")


In [42]:
dir_Lee = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee'

In [43]:
Lee = Preprocess(dir_Lee)
Lee.process_data()

No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/tarfiles, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P4_filtered_feature_bc_matrix/PBMC-P4_filtered_feature_bc_matrix_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P4_filtered_feature_bc_matrix/PBMC-P4_filtered_feature_bc_matrix_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-VM_filtered_feature_bc_matrix/PBMC-VM_filtered_feature_bc_matrix_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-VM_filtered_feature_bc_matrix/PBMC-VM_filtered_feature_bc_matrix_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/LiM_filtered_fe

In [44]:
dir_steele = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele'
Steele = Preprocess(dir_steele)
Steele.process_data()

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_2/filtered_feature_bc_matrix_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_2/filtered_feature_bc_matrix_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_4/filtered_feature_bc_matrix_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_4/filtered_feature_bc_matrix_output_preprocessed.h5ad
No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_4/filtered_feature_bc_matrix_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po

In [46]:
dir_Simeone = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone'
Simeone = Preprocess(dir_Simeone)
Simeone.process_data()

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204125_P17/GSM6204125_P17_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204125_P17/GSM6204125_P17_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204130_P22/GSM6204130_P22_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204130_P22/GSM6204130_P22_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204112_P04/GSM6204112_P04_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204112_P04/GSM6204112_P04_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_

In [52]:
dir_Regev = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN'
Regev = Preprocess(dir_Regev)
Regev.process_data()

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/nuc/GSE202051_adata_010nuc_10x.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/nuc/GSE202051_adata_010nuc_10x_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/orgCRT/GSE202051_adata_010orgCRT_10x.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/orgCRT/GSE202051_adata_010orgCRT_10x_preprocessed.h5ad


In [53]:
dir_Ding = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Ding'
Ding = Preprocess(dir_Ding)
Ding.process_data()

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Ding/data/Liding.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Ding/data/Liding_preprocessed.h5ad


In [5]:
dir_Zenodo = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo'
Zenodo = Preprocess(dir_Zenodo)
Zenodo.process_data()

No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Code, skipping this directory
No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/.ipynb_checkpoints, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Data/pk_all.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Data/pk_all_preprocessed.h5ad
No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Code, skipping this directory
No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/.ipynb_checkpoints, skipping this directory


In [2]:
dir_Caronni = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni'
Caronni = Preprocess(dir_Caronni)
Caronni.process_data()

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC/GSM6727542_LPDAC_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC/GSM6727542_LPDAC_output_preprocessed.h5ad
No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSE217847, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727551_PDAC/GSM6727551_PDAC_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727551_PDAC/GSM6727551_PDAC_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727548_PDAC/GSM6727548_PDAC_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/ra

In [3]:
dir_Zhang = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang'
Zhang = Preprocess(dir_Zhang)
Zhang.process_data()

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910789_Case3-YF/GSM5910789_Case3-YF_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910789_Case3-YF/GSM5910789_Case3-YF_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910786_Case2-ZC/GSM5910786_Case2-ZC_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910786_Case2-ZC/GSM5910786_Case2-ZC_output_preprocessed.h5ad
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY/GSM5910785_Case1-ZY_output.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY/GSM5910785_Case1-ZY_output_preprocessed.h5ad
Processing file: /dss/dssfs02/l

In [4]:
import os
import scanpy as sc
import seaborn as sb
import numpy as np
import matplotlib.pyplot as plt
import warnings

class Plots_Post_Filtering:
    def __init__(self, root_directory):
        self.root_directory = root_directory
        self.adata_path = None
        self.adata = None
        self.plot()

    def plot(self):
        warnings.simplefilter("ignore")
        for subdir in os.listdir(self.root_directory):
            subdirectory_path = os.path.join(self.root_directory, subdir)
            if not os.path.isdir(subdirectory_path):
                continue
            
            preprocessed_file = [f for f in os.listdir(subdirectory_path) if f.endswith('preprocessed.h5ad')]
            if not preprocessed_file:
                print(f'No preprocessed file in {subdirectory_path}, skipping this directory')
                continue

            if preprocessed_file:
                self.adata_path = os.path.join(subdirectory_path, preprocessed_file[0])
                print(f"Processing file: {self.adata_path}")

                # Load adata
                self.adata = sc.read_h5ad(self.adata_path)

                # Quality control - calculate QC covariates
                if self.adata is not None:

                    p1 = sc.pl.scatter(self.adata, 'n_counts', 'n_genes', color='mt_frac', size=40, show=False)
                    plot_filename = os.path.join(subdirectory_path, f"{os.path.splitext(os.path.basename(self.adata_path))[0]}_scatter_plot_preprocessed.png")
                    print(plot_filename)
                    plt.savefig(plot_filename)
                    plt.close() 
                    
                    # Create a separate figure for p2
                    fig2, ax2 = plt.subplots()
                    p2 = sb.distplot(self.adata.obs['n_counts'], kde=False, ax=ax2)
                    plot_filename = os.path.join(subdirectory_path, f"{os.path.splitext(os.path.basename(self.adata_path))[0]}_ncounts_plot_preprocessed.png")
                    fig2.savefig(plot_filename)
                    plt.close(fig2)  # Close the figure to release resources

                    # Create a separate figure for p3
                    fig3, ax3 = plt.subplots()
                    p3 = sb.distplot(self.adata.obs['n_genes'], kde=False, ax=ax3)
                    plot_filename = os.path.join(subdirectory_path, f"{os.path.splitext(os.path.basename(self.adata_path))[0]}_ngenes_plot_preprocessed.png")
                    fig3.savefig(plot_filename)
                    plt.close(fig3)  # Close the figure to release resources
                    
                else:
                    print("No adata loaded for plotting.")


In [21]:
dir_Lee = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee'
Lee = Plots_Post_Filtering(dir_Lee)

No preprocessed file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/tarfiles, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P4_filtered_feature_bc_matrix/PBMC-P4_filtered_feature_bc_matrix_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P4_filtered_feature_bc_matrix/PBMC-P4_filtered_feature_bc_matrix_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-VM_filtered_feature_bc_matrix/PBMC-VM_filtered_feature_bc_matrix_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-VM_filtered_feature_bc_matrix/PBMC-VM_filtered_feature_bc_matrix_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC

In [22]:
dir_steele = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele'
Steele = Plots_Post_Filtering(dir_steele)

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_2/filtered_feature_bc_matrix_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_2/filtered_feature_bc_matrix_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_4/filtered_feature_bc_matrix_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_4/filtered_feature_bc_matrix_output_preprocessed_scatter_plot_preprocessed.png
No preprocessed file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_4/filtered_feature_bc_matrix_output_preprocessed.h5ad
/dss/dssfs02/lwp-

In [23]:
dir_Simeone = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone'
Simeone = Plots_Post_Filtering(dir_Simeone)

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204125_P17/GSM6204125_P17_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204125_P17/GSM6204125_P17_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204130_P22/GSM6204130_P22_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204130_P22/GSM6204130_P22_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204112_P04/GSM6204112_P04_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204112_P04/GSM6204112_P04_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-00

In [24]:
dir_Regev = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN'
Regev = Plots_Post_Filtering(dir_Regev)

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/final/GSE202051_totaldata-final-toshare_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/final/GSE202051_totaldata-final-toshare_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/nuc/GSE202051_adata_010nuc_10x_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/nuc/GSE202051_adata_010nuc_10x_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/orgCRT/GSE202051_adata_010orgCRT_10x_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Regev_SN/orgCRT/GSE202051_adata_010orgCRT_10x_preprocessed_scatter_plot_preprocessed.png


In [25]:
dir_Ding = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Ding'
Ding = Plots_Post_Filtering(dir_Ding)

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Ding/data/Liding_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Ding/data/Liding_preprocessed_scatter_plot_preprocessed.png


In [12]:
dir_Zenodo = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo'
Zenodo = Plots_Post_Filtering(dir_Zenodo)

No preprocessed file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Code, skipping this directory
No preprocessed file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/.ipynb_checkpoints, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Data/pk_all_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zenodo/Data/pk_all_preprocessed_scatter_plot_preprocessed.png


In [41]:
dir_peng = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng'
peng = Preprocess(dir_peng)
peng.process_data()


No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/.ipynb_checkpoints, skipping this directory
Processed File Found
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/Processed_Data/StdWf1_PRJCA001063_CRC_besca2.annotated.h5ad
Filtered adata saved to: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/Processed_Data/StdWf1_PRJCA001063_CRC_besca2.annotated_preprocessed.h5ad
No h5ad file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/.ipynb_checkpoints, skipping this directory
Processed File Found
Processed File Found


In [42]:
peng = Plots_Post_Filtering(dir_peng)

No preprocessed file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/.ipynb_checkpoints, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/Raw_Data/StdWf1_PRJCA001063_CRC_besca2.raw_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/Raw_Data/StdWf1_PRJCA001063_CRC_besca2.raw_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/Processed_Data/StdWf1_PRJCA001063_CRC_besca2.annotated_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Peng/Processed_Data/StdWf1_PRJCA001063_CRC_besca2.annotated_preprocessed_scatter_plot_preprocessed.png


In [5]:
Caronni = Plots_Post_Filtering(dir_Caronni)

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC/GSM6727542_LPDAC_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC/GSM6727542_LPDAC_output_preprocessed_scatter_plot_preprocessed.png
No preprocessed file in /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSE217847, skipping this directory
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727551_PDAC/GSM6727551_PDAC_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727551_PDAC/GSM6727551_PDAC_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727548_PDAC/GSM6727548_PDAC_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-000

In [6]:
Zhang = Plots_Post_Filtering(dir_Zhang)

Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910789_Case3-YF/GSM5910789_Case3-YF_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910789_Case3-YF/GSM5910789_Case3-YF_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910786_Case2-ZC/GSM5910786_Case2-ZC_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910786_Case2-ZC/GSM5910786_Case2-ZC_output_preprocessed_scatter_plot_preprocessed.png
Processing file: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY/GSM5910785_Case1-ZY_output_preprocessed.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY/GSM5910785_Case1-ZY_output_preprocessed_scatter_plot_preproces