

In this notebook, I will show first how to concate the TPM and counts across samples and then show how to perform DEG and generate SWAN reports.

## Load library

In [9]:
import pandas as pd
import numpy as np
import anndata as ad
import scanpy as sc
import swan_vis as swan

from utils import *

import warnings
warnings.filterwarnings("ignore")

## Concate count and tpm matrices

In [35]:
samples = pd.read_csv("sample_name.txt", header=None)[0].tolist()

metadata_sample_path = "sample_metadata.csv"

adata_count = None
adata_tpm = None

for sample in samples:
    transcript = pd.read_csv(f'output_{sample}/transcripts.txt', header=None, index_col=0)
    transcript.index.names = ['']
    counts = sc.read_mtx(f'output_{sample}/matrix.abundance.mtx', dtype='float32')
    counts.obs.index = [sample]
    counts.var.index = transcript.index.tolist()
    
    tpm = sc.read_mtx(f'output_{sample}/matrix.abundance.tpm.mtx', dtype='float32')
    tpm.obs.index = [sample]
    tpm.var.index = transcript.index.tolist()

    if adata_count is None:
        adata_count = counts.copy()
        adata_tpm = tpm.copy()
        continue

    adata_count = ad.concat([adata_count, counts], axis=0)
    adata_count.var = pd.concat([adata_count.var, transcript], axis=1).loc[adata_count.var.index, :]

    adata_tpm = ad.concat([adata_tpm, tpm], axis=0)
    adata_tpm.var = pd.concat([adata_tpm.var, transcript], axis=1).loc[adata_tpm.var.index, :]

metadata_sample = pd.read_csv(metadata_sample_path)
metadata_sample.index = metadata_sample['Sample Name'].values

adata_count.obs = pd.concat([adata_count.obs, metadata_sample], axis=1)
adata_tpm.obs = pd.concat([adata_tpm.obs, metadata_sample], axis=1)


adata_count.write(filename="transcript_exp_count.h5ad")
adata_tpm.write(filename="transcript_exp_tpm.h5ad")

## Differential  Gene Expression(DGE) and Differential Isoform Expression(DIE)

In [None]:
swan_output = "swan/swan_graph.p"
sg = swan.read(swan_output)

filtering = {'age': ["4_months"], #c("12mo"),
             'sex': ["F"], #c("M"),
             'genotype': ['5xFAD-HEMI', '5xFAD-HEMI_Clu-h2kbKI-HO']}
fname = f"DEG/{'_'.join(filtering['genotype'])}_{'_'.join(filtering['age'])}_{'_'.join(filtering['sex'])}"

dds = run_deseq1(sg,
           how='gene',
           obs_filtering=filtering,
           obs_condition='genotype',
           l2fc_thresh=0, 
           adj_p_thresh=0.05,
           ofile=fname)

## SWAN Report

In this section, I show how to create a swan object and how you can make a SWAN report for interested genes.

In [None]:
study = "ad003"

gtf_cerberus = ["5xFAD-WT_Clu-h2kbKI-HO_F_4_months_HC_4.gtf", 
                "5xFAD-HEMI_Clu-h2kbKI-HO_F_4_months_HC_2.gtf", 
                "5xFAD-HEMI_F_4_months_HC_1.gtf", 
                "5xFAD-WT_F_4_months_HC_3.gtf"]
ab_cerberus = ["5xFAD-HEMI_Clu-h2kbKI-HO_F_4_months_HC_2_abundance.tsv", 
               "5xFAD-HEMI_F_4_months_HC_1_abundance.tsv", 
               "5xFAD-WT_Clu-h2kbKI-HO_F_4_months_HC_4_abundance.tsv", 
               "5xFAD-WT_F_4_months_HC_3_abundance.tsv"]


annot = "ref/cerberus.gtf"
meta = "swan/meta.tsv"
swan_output = "swan/swan_graph"

create_swan_obj(study, 
                    gtf_cerberus,
                    ab_cerberus,
                    annot,
                    meta,
                    swan_output)

make_reports('DES')