In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import glob
import scipy.io
import os 
import anndata 

class DataProcessor:
    def __init__(self, root_directory):
        self.root_directory = root_directory

    def assemble_h5ad(self, barcodes_file, features_file, matrix_file, output_file):
        # Read barcode, features, and matrix files
        barcodes = pd.read_csv(barcodes_file, header=None, index_col=0, names=['barcode'])
        features = pd.read_csv(features_file, sep='\t', header=None)
        features.columns = features.columns.astype(str)
        barcodes.columns = barcodes.columns.astype(str)
        matrix = scipy.io.mmread(matrix_file).T.tocsc()

        # Create AnnData object
        adata = anndata.AnnData(X=matrix, obs=barcodes, var=features)
        # adata_copy = adata.copy()

        # Convert index to strings in the copy
        adata.var = adata.var.astype(str)
        adata.obs = adata.obs.astype(str)

        # Write h5ad file
        adata.write_h5ad(output_file)


    def process_directory(self):
        file_counter = 0 
        for subdir in os.listdir(self.root_directory):
            subdirectory_path = os.path.join(self.root_directory, subdir)    
            print(subdirectory_path)
            if not os.path.isdir(subdirectory_path):
                continue
    
            matrix_files = [f for f in os.listdir(subdirectory_path) if f.endswith(('matrix.mtx.gz', 'matrix.mtx'))]
            
            if not matrix_files:
                print('No matrix file, skipping this directory')
                continue
    
            if matrix_files:
                matrix_file = os.path.join(subdirectory_path, matrix_files[0])
                barcodes_file = os.path.join(subdirectory_path,[f for f in os.listdir(subdirectory_path) if 'barcode' in f or 'barcodes' in f][0])
                features_file = os.path.join(subdirectory_path,[f for f in os.listdir(subdirectory_path) if 'feature' in f or 'genes' in f or 'feature' in f][0])
                # Define the output file path
                output_file = os.path.join(subdirectory_path, f'{subdir}_output.h5ad')
    
                # Call the assemble_h5ad function
                self.assemble_h5ad(barcodes_file, features_file, matrix_file, output_file)
                file_counter += 1
                print('Saving file at:', output_file)
        print(f"Successfully processed {file_counter} directories.")



In [82]:
root_directory = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee'

# Create an instance of DataProcessor
data_processor = DataProcessor(root_directory)

# Call the process_directory method
data_processor.process_directory()

/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/tarfiles
No matrix file, skipping this directory
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P4_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P4_filtered_feature_bc_matrix/PBMC-P4_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-VM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-VM_filtered_feature_bc_matrix/PBMC-VM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/LiM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/LiM_filtered_feature_bc_matrix/LiM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P1_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P1_filtered_feature_bc_matrix/P1_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P3_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P3_filtered_feature_bc_matrix/PBMC-P3_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-LuM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-LuM_filtered_feature_bc_matrix/PBMC-LuM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/VM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/VM_filtered_feature_bc_matrix/VM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P1_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P1_filtered_feature_bc_matrix/PBMC-P1_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P3_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P3_filtered_feature_bc_matrix/P3_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-LiM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-LiM_filtered_feature_bc_matrix/PBMC-LiM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/LuM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/LuM_filtered_feature_bc_matrix/LuM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P2_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-P2_filtered_feature_bc_matrix/PBMC-P2_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PM_umiCounts_aboveBackground.csv
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-PM_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/PBMC-PM_filtered_feature_bc_matrix/PBMC-PM_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P4_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P4_filtered_feature_bc_matrix/P4_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/GSE156405
No matrix file, skipping this directory
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P5_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P5_filtered_feature_bc_matrix/P5_filtered_feature_bc_matrix_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P2_filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Lee/P2_filtered_feature_bc_matrix/P2_filtered_feature_bc_matrix_output.h5ad
Successfully processed 16 directories.


In [85]:
main_directory = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele'
#Loop throught the main directory because files are 1 down inside another directory for Steele 
for subdir in os.listdir(main_directory):
    root_directory = os.path.join(main_directory, subdir)

    try:
        # Check if it's a directory
        if not os.path.isdir(root_directory):
            raise NotADirectoryError(f"{root_directory} is not a directory")

        # Create an instance of DataProcessor
        data_processor = DataProcessor(root_directory)

        # Call the process_directory method
        data_processor.process_directory()

    except Exception as e:
        print(f"Error processing {root_directory}: {e}")

/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_2/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_2/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_4/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_4/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles/GSM4710689_PDAC_TISSUE_1.tar
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles/GSM4710723_PDAC_PBMC_14.tar
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles/GSM4710709_PDAC_PBMC_1.tar
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles/GSM4710694_PDAC_TISSUE_6.tar
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles/GSM4710692_PDAC_TISSUE_4.tar
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/tarfiles/GSM4710718_PDAC_PBMC_10A.tar
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_



Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_4/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_10/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_10/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_1/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_1/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_5/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_5/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_7/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_7/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_7/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_7/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_16/filtered_gene_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_16/filtered_gene_bc_matrix/filtered_gene_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_10B/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_10B/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_2/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_2/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/AdjNorm_TISSUE_2/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/AdjNorm_TISSUE_2/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_3/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_3/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_11B/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_11B/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_1/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_1/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_15/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_15/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_12/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_12/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_11/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_11/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_5/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_5/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_10A/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_10A/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/GSE155698/GSE155698_RAW.tar
Successfully processed 0 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_12/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_12/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_9/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_9/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_3/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_3/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_2/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_2/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_14/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_14/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
Error processing /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/.DS_Store: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/.DS_Store is not a directory
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_6/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_6/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_9/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_9/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_11A/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_11A/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_16/filtered_gene_bc_matrices_h5.h5
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_16/filtered_gene_bc_matrices




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_16/filtered_gene_bc_matrices/filtered_gene_bc_matrices_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_13/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_13/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_3/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_3/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_1/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_1/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_4/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/Healthy_PBMC_4/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_8/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_8/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/AdjNorm_TISSUE_1/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/AdjNorm_TISSUE_1/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_6/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_6/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_TISSUE_14/filtered_feature_bc_matrix.h5
Successfully processed 0 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_13/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_13/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_8/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_8/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/AdjNorm_TISSUE_3/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/AdjNorm_TISSUE_3/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_15/filtered_feature_bc_matrix




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Steele/PDAC_PBMC_15/filtered_feature_bc_matrix/filtered_feature_bc_matrix_output.h5ad
Successfully processed 1 directories.


In [62]:
#Move the files for Simeone into directories for consistency
import shutil

files = []
directory_path = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone'
for file in os.listdir(directory_path):
    try:
        if file.startswith('GSM'):
            parts = file.split('_')[0] + '_' + file.split('_')[1]
            files.append(parts)
        files_unique = sorted(list(set(files)))
    except:
        None

for new_dir in files_unique:
    directory = os.path.join(directory_path, new_dir)
    os.makedirs(directory, exist_ok=True)

for dir in files_unique:
    for file in sorted(os.listdir(directory_path)) :
        file_path = os.path.join(directory_path, file)
        if os.path.isfile(file_path):
            if file.startswith(dir):
                new_directory = os.path.join(directory_path, dir)
                shutil.move(file_path, new_directory)
                print(f"Moved {file_path} to {new_directory}")

    #     if file.startswith(new_dir):
    #         source_path = os.path.join(directory_path, file)
    #         print(source_path, directory)
    #         # shutil.move(source_path, destination_path)

Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01_barcodes.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01_features.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01_matrix.mtx to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204110_P02_barcodes.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204110_P02
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204110_P02_features.tsv to /

In [84]:
root_directory = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone'

# Create an instance of DataProcessor
data_processor = DataProcessor(root_directory)

# Call the process_directory method
data_processor.process_directory()

/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204125_P17




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204125_P17/GSM6204125_P17_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204130_P22




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204130_P22/GSM6204130_P22_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204112_P04




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204112_P04/GSM6204112_P04_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204111_P03




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204111_P03/GSM6204111_P03_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204123_P15




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204123_P15/GSM6204123_P15_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSE205013
No matrix file, skipping this directory
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204116_P08




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204116_P08/GSM6204116_P08_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204134_P26




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204134_P26/GSM6204134_P26_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204131_P23




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204131_P23/GSM6204131_P23_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204114_P06




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204114_P06/GSM6204114_P06_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204119_P11




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204119_P11/GSM6204119_P11_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204133_P25




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204133_P25/GSM6204133_P25_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204124_P16




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204124_P16/GSM6204124_P16_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204120_P12




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204120_P12/GSM6204120_P12_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204132_P24




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204132_P24/GSM6204132_P24_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/.DS_Store
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204115_P07




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204115_P07/GSM6204115_P07_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204110_P02




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204110_P02/GSM6204110_P02_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204127_P19




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204127_P19/GSM6204127_P19_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204126_P18




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204126_P18/GSM6204126_P18_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204135_P27




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204135_P27/GSM6204135_P27_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204129_P21




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204129_P21/GSM6204129_P21_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204117_P09




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204117_P09/GSM6204117_P09_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204118_P10




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204118_P10/GSM6204118_P10_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204121_P13




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204121_P13/GSM6204121_P13_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204113_P05




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204113_P05/GSM6204113_P05_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204109_P01/GSM6204109_P01_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204128_P20




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204128_P20/GSM6204128_P20_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204122_P14




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Simeone/GSM6204122_P14/GSM6204122_P14_output.h5ad
Successfully processed 27 directories.


In [8]:
import shutil

files = []
directory_path = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni'
for file in os.listdir(directory_path):
    try:
        if file.startswith('GSM'):
            parts = file.split('_')[0] + '_' + file.split('_')[1]
            files.append(parts)
        files_unique = sorted(list(set(files)))
    except:
        None
        
for new_dir in files_unique:
    directory = os.path.join(directory_path, new_dir)
    os.makedirs(directory, exist_ok=True)

for dir in files_unique:
    for file in sorted(os.listdir(directory_path)) :
        file_path = os.path.join(directory_path, file)
        if os.path.isfile(file_path):
            if file.startswith(dir):
                new_directory = os.path.join(directory_path, dir)
                shutil.move(file_path, new_directory)
                print(f"Moved {file_path} to {new_directory}")

Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC_15_Tumor_barcodes.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC_15_Tumor_features.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC_15_Tumor_matrix.mtx to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC_25_Tumor_barcodes.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_da

In [6]:
os.lis

['GSM6727542_LPDAC',
 'GSM6727543_LPDAC',
 'GSM6727544_LPDAC',
 'GSM6727545_LPDAC',
 'GSM6727546_PDAC',
 'GSM6727547_PDAC',
 'GSM6727548_PDAC',
 'GSM6727549_PDAC',
 'GSM6727550_PDAC',
 'GSM6727551_PDAC']

In [7]:
for dir in files_unique:
    for file in sorted(os.listdir(directory_path)) :
        file_path = os.path.join(directory_path, file)
        print(file_path)

/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSE217847
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC_15_Tumor_barcodes.tsv
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC_15_Tumor_features.tsv
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC_15_Tumor_matrix.mtx
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC_25_Tumor_barcodes.tsv
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC_25_Tumor_features.tsv
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC_25_Tumor_matrix.mtx
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727544_LPDAC_26_Tumor_barcodes.tsv
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/

In [9]:
root_directory = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/'

# Create an instance of DataProcessor
data_processor = DataProcessor(root_directory)

# Call the process_directory method
data_processor.process_directory()

/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727542_LPDAC/GSM6727542_LPDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSE217847
No matrix file, skipping this directory
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727551_PDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727551_PDAC/GSM6727551_PDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727548_PDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727548_PDAC/GSM6727548_PDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727547_PDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727547_PDAC/GSM6727547_PDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727545_LPDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727545_LPDAC/GSM6727545_LPDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727549_PDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727549_PDAC/GSM6727549_PDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727550_PDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727550_PDAC/GSM6727550_PDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727544_LPDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727544_LPDAC/GSM6727544_LPDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727543_LPDAC/GSM6727543_LPDAC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727546_PDAC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Caronni/GSM6727546_PDAC/GSM6727546_PDAC_output.h5ad
Successfully processed 10 directories.


In [10]:
import shutil

files = []
directory_path = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang'
for file in os.listdir(directory_path):
    try:
        if file.startswith('GSM'):
            parts = file.split('_')[0] + '_' + file.split('_')[1]
            files.append(parts)
        files_unique = sorted(list(set(files)))
    except:
        None
        
for new_dir in files_unique:
    directory = os.path.join(directory_path, new_dir)
    os.makedirs(directory, exist_ok=True)

for dir in files_unique:
    for file in sorted(os.listdir(directory_path)) :
        file_path = os.path.join(directory_path, file)
        if os.path.isfile(file_path):
            if file.startswith(dir):
                new_directory = os.path.join(directory_path, dir)
                shutil.move(file_path, new_directory)
                print(f"Moved {file_path} to {new_directory}")

Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF_barcodes.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF_features.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF_matrix.mtx to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY_barcodes.tsv to /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY
Moved /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785

In [None]:
root_directory = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/'

# Create an instance of DataProcessor
data_processor = DataProcessor(root_directory)

# Call the process_directory method
data_processor.process_directory()

/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910789_Case3-YF




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910789_Case3-YF/GSM5910789_Case3-YF_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910786_Case2-ZC




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910786_Case2-ZC/GSM5910786_Case2-ZC_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910785_Case1-ZY/GSM5910785_Case1-ZY_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910791_Case4-ZY




Saving file at: /dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910791_Case4-ZY/GSM5910791_Case4-ZY_output.h5ad
/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di35nod/PDAC_data/raw_data/Zhang/GSM5910784_Case1-YF


In [2]:
root_directory = '/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin'

# Create an instance of DataProcessor
data_processor = DataProcessor(root_directory)

# Call the process_directory method
data_processor.process_directory()

/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679533




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679533/GSM4679533_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSE154778_RAW.tar
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679541




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679541/GSM4679541_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679532




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679532/GSM4679532_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679546




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679546/GSM4679546_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679536




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679536/GSM4679536_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679547




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679547/GSM4679547_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679535




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679535/GSM4679535_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679539




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679539/GSM4679539_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679545




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679545/GSM4679545_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679537




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679537/GSM4679537_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679538




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679538/GSM4679538_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679544




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679544/GSM4679544_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679534




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679534/GSM4679534_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679542




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679542/GSM4679542_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679543




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679543/GSM4679543_output.h5ad
/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679540




Saving file at: /lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Lin/GSM4679540/GSM4679540_output.h5ad
Successfully processed 16 directories.


In [5]:
schlesinger = pd.read_csv('/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Schlesinger/GSM4293555_Human.csv', sep='\t',  index_col=0)

In [16]:
schlesinger = schlesinger.T

In [23]:
from scipy import sparse
sparse_matrix = sparse.csr_matrix(schlesinger.values)
adata = sc.AnnData(X=sparse_matrix, obs=pd.DataFrame(index=schlesinger.index), var=pd.DataFrame(index=schlesinger.columns))
adata.write('/lustre/groups/ml01/workspace/shrey.parikh/PDAC/raw_data/Schlesinger/GSM4293555_Human.h5ad')