In [9]:
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import pandas as pd


In [10]:
file_paths = [
    "/storage/users/data/PANC/forPseudotime.h5ad",
    "/storage/users/data/PANC/PANC1_SC1.h5ad",
    "/storage/users/data/PANC/hvg_adata.h5ad",
    "/storage/users/data/PANC/clustered_adata.h5ad",
    "/storage/users/data/PANC/filtered_preprocessed_combined_adata.h5ad"
]

## Module 1: Checking uniqueness of indices and columns for each AnnData file

In [12]:


# Module 1: Check uniqueness of indices and columns
print("Module 1: Checking uniqueness of indices and columns for each AnnData file.\n")
for file_path, name in zip(file_paths, names):
    try:
        adata = sc.read(file_path)
        df = adata.to_df()

        # Check for unique index
        if not df.index.is_unique:
            print(f"File '{name}' has a non-unique index.")
            duplicated_indices = df.index[df.index.duplicated()].unique()
            print(f"Duplicated index values in file '{name}': {duplicated_indices.tolist()}")
        else:
            print(f"File '{name}' has a unique index.")

        # Check for unique columns
        if not df.columns.is_unique:
            print(f"File '{name}' has non-unique columns.")
            duplicated_columns = df.columns[df.columns.duplicated()].unique()
            print(f"Duplicated column names in file '{name}': {duplicated_columns.tolist()}")
        else:
            print(f"File '{name}' has unique columns.")

    except Exception as e:
        print(f"Error while checking uniqueness for file '{name}': {e}")

print("\nModule 1 completed.\n")


Module 1: Checking uniqueness of indices and columns for each AnnData file.

File 'forPseudotime' has a unique index.
File 'forPseudotime' has unique columns.


  utils.warn_names_duplicates("var")


File 'PANC1_SC1' has a unique index.
File 'PANC1_SC1' has non-unique columns.
Duplicated column names in file 'PANC1_SC1': ['MATR3', 'RAET1E-AS1', 'MTUS1-DT', 'DNAJC9-AS1', 'GPR84-AS1', 'GOLGA8M', 'ATXN7L3-AS1', 'ELFN2']
File 'hvg_adata' has a unique index.
File 'hvg_adata' has unique columns.
File 'clustered_adata' has a unique index.
File 'clustered_adata' has unique columns.
File 'filtered_preprocessed_combined' has a unique index.
File 'filtered_preprocessed_combined' has unique columns.

Module 1 completed.



## Module 2: Counting number of variables (cells) per condition

In [13]:
# Module 2: Count number of variables per condition in 'obs'
print("Module 2: Counting number of variables (cells) per condition.\n")
for file_path, name in zip(file_paths, names):
    try:
        adata = sc.read(file_path)
        
        # Count number of variables per condition in 'obs'
        if 'condition' in adata.obs.columns:
            condition_counts = adata.obs['condition'].value_counts()
            print(f"File '{name}' has the following number of cells per condition:\n{condition_counts}\n")
        else:
            print(f"File '{name}' does not contain a 'condition' column in .obs.\n")

    except Exception as e:
        print(f"Error while counting variables for file '{name}': {e}")

print("\nModule 2 completed.\n")

Module 2: Counting number of variables (cells) per condition.

File 'forPseudotime' has the following number of cells per condition:
condition
CTRL_2D             3737
CTRL_merged         3191
TGFb1_merged        2142
TGFb1_GEM_merged     792
GEM_2                363
Name: count, dtype: int64



  utils.warn_names_duplicates("var")


File 'PANC1_SC1' has the following number of cells per condition:
condition
CTRL_merged         3191
TGFb1_merged        2142
TGFb1_GEM_merged     792
GEM_2                363
Name: count, dtype: int64

File 'hvg_adata' has the following number of cells per condition:
condition
CTRL_2D        3913
CTRL_1         2822
TGFb1_1        1882
TGFb1_GEM_2     459
CTRL_2          418
GEM_2           393
TGFb1_GEM_1     370
TGFb1_2         337
Name: count, dtype: int64

File 'clustered_adata' has the following number of cells per condition:
condition
CTRL_2D        3913
CTRL_1         2822
TGFb1_1        1882
TGFb1_GEM_2     459
CTRL_2          418
GEM_2           393
TGFb1_GEM_1     370
TGFb1_2         337
Name: count, dtype: int64

File 'filtered_preprocessed_combined' has the following number of cells per condition:
condition
CTRL_2D        3989
CTRL_1         2844
TGFb1_1        1906
TGFb1_GEM_2     462
CTRL_2          421
GEM_2           396
TGFb1_GEM_1     372
TGFb1_2         340
Name: co

## Module 3: Visualizing correlations of the barcodes (cells)

In [None]:
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Module 3: Visualize correlations of barcodes (cells)
print("Module 3: Visualizing correlations of the barcodes (cells).\n")
for file_path, name in zip(file_paths, names):
    try:
        adata = sc.read(file_path)

        # Convert adata.X to a DataFrame for easier manipulation
        df = pd.DataFrame(adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X,
                          index=adata.obs_names,
                          columns=adata.var_names)

        # Transpose the data frame to get correlations between cells (barcodes)
        barcode_correlations = df.transpose().corr()

        # Visualize correlation between barcodes and include observation condition info
        plt.figure(figsize=(10, 8))
        sns.heatmap(barcode_correlations, cmap='viridis', linewidths=0.5)
        plt.title(f"Correlation Heatmap of Barcodes for '{name}'")
        plt.show()

    except Exception as e:
        print(f"Error while visualizing barcode correlations for file '{name}': {e}")

print("\nModule 3 completed.\n")


In [None]:
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Module 3: Visualize correlations of barcodes (cells)
print("Module 3: Visualizing correlations of the barcodes (cells).\n")
for file_path, name in zip(file_paths, names):
    try:
        adata = sc.read(file_path)

        # Convert adata.X to a DataFrame for easier manipulation
        df = pd.DataFrame(adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X,
                          index=adata.obs_names,
                          columns=adata.var_names)

        # Transpose the data frame to get correlations between cells (barcodes)
        barcode_correlations = df.transpose().corr()

        # Visualize correlation between barcodes and include observation condition info
        plt.figure(figsize=(10, 8))
        sns.heatmap(barcode_correlations, cmap='viridis', linewidths=0.5)
        plt.title(f"Correlation Heatmap of Barcodes for '{name}'")
        plt.show()

    except Exception as e:
        print(f"Error while visualizing barcode correlations for file '{name}': {e}")

print("\nModule 3 completed.\n")
