In [4]:
# Import Libraries
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Bioinformatics Libraries
import Bio
import scanpy as sc

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Load the pbmc3k dataset
adata = sc.datasets.pbmc3k()

print(adata)

AnnData object with n_obs × n_vars = 2700 × 32738
    var: 'gene_ids'


In [6]:
# Basic filtering
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

print(adata)


AnnData object with n_obs × n_vars = 2700 × 13714
    obs: 'n_genes'
    var: 'gene_ids', 'n_cells'


In [7]:
# Normalize the data
sc.pp.normalize_total(adata, target_sum=1e4)

# Logarithmize the data
sc.pp.log1p(adata)

# Store the raw data
adata.raw = adata



In [8]:
# Identify highly variable genes
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

# Filter the data
adata = adata[:, adata.var.highly_variable]

# Summarize the highly variable genes
print(adata.var.highly_variable.sum())


1872


In [9]:
adata

View of AnnData object with n_obs × n_vars = 2700 × 1872
    obs: 'n_genes'
    var: 'gene_ids', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'log1p', 'hvg'

## Dimension Reduction

### Zifa

ZIFA has an n_componet like as K , I still did that since I didn't know when i started

In [10]:
# Import Libraries
from ZIFA import ZIFA
from ZIFA import block_ZIFA
import numpy as np



In [12]:
# ZIFA is designed to work with count data, so we need to make sure we're using the raw counts before normalization and log-transformation.
# If you have stored the raw data in adata.raw, you can use that; otherwise, make sure to use the original count data.
X_counts = adata.raw.X.toarray() if adata.raw is not None else adata.X.toarray()
X_counts.shape

(2700, 13714)

In [13]:
# Set the number of dimensions to reduce down to
n_components = 6  # ZIFA often works better with fewer components

# Fit the ZIFA model
# Note that ZIFA can be very slow, especially on larger datasets
Z, model_params = ZIFA.fitModel(X_counts, n_components)

# The result is in Z, which we can add back to the AnnData object
adata.obsm['X_zifa'] = Z


Running zero-inflated factor analysis with N = 2700, D = 13714, K = 6


In [None]:
# Visualize the results
# Extract the first two ZIFA components
zifa_comp1 = adata.obsm['X_zifa'][:, 0]
zifa_comp2 = adata.obsm['X_zifa'][:, 1]

# Create a scatter plot of the first two ZIFA components
plt.figure(figsize=(8, 6))
plt.scatter(zifa_comp1, zifa_comp2, s=5, alpha=0.7)
plt.title('ZIFA - First Two Components')
plt.xlabel('ZIFA1')
plt.ylabel('ZIFA2')
plt.show()
