# Create SwanGraph from C2C12 PacBio data

## Load libraries

In [1]:
import swan_vis as swan
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt



## Load data into Python
We have data from C2C12 myoblasts and 3-day differentiated myotubes. Bulk long-read RNA-seq was performed using polyA primers and PacBio sequencing. To quantify transcript expression, reads were mapped to the mouse genome using Minimap2, then the reads were cleaned and pre-processed with TranscriptClean and [TALON](https://github.com/mortazavilab/TALON). 

In short, BAM files were downloaded from [this ENCODE data cart](https://www.encodeproject.org/carts/c2c12_bulk_pb/) and pre-processed with the walkthrough described [here](https://freese.gitbook.io/swan/tutorials/data_processing).

We will use [Swan](https://freese.gitbook.io/swan/) to analyze and visualize the C2C12 long-read transcriptome.

## Initialize a new SwanGraph


In [2]:
sg = swan.SwanGraph()

In [3]:
annot_gtf = '../swan_data/gencode.vM21.primary_assembly.annotation_UCSC_names.gtf'
data_gtf = '../swan_data/all_talon_observedOnly.gtf'
ab_file = '../swan_data/all_talon_abundance_filtered.tsv'
talon_db = '../swan_data/talon.db'
pass_list = '../swan_data/all_pass_list.csv'
meta = '../swan_data/swan_metadata.tsv'

## Adding a reference transcriptome

In [None]:
# add an annotation transcriptome
sg.add_annotation(annot_gtf)


Adding annotation to the SwanGraph


## Adding transcript models from a TALON database

In [None]:
# add a dataset's transcriptome and abundance information to
# the SwanGraph
sg.add_transcriptome(talon_db, pass_list=pass_list)

## Adding datasets and their abundance


In [None]:
# add each dataset's abundance information to the SwanGraph
sg.add_abundance(ab_file)

## Adding metadata

In [None]:
sg.add_metadata(meta)

## Saving and loading your SwanGraph

In [None]:
# save the SwanGraph as a Python pickle file
sg.save_graph('../swan_data/swan')

In [None]:
# load up a saved SwanGraph from a pickle file
sg = swan.read('../swan_data/swan.p')

## Differential gene expression 

In [None]:
obs_col = 'timepoint'
obs_conditions = ['72hr', '0hr']

In [None]:
# perform a differential gene expression
# Wald test on the provided metadata column and conditions
test = sg.de_gene_test(obs_col, obs_conditions=obs_conditions)

## Differential transcript expression 

In [None]:
# perform a differential transcript expression
# Wald test on the provided metadata column and conditions
test = sg.de_transcript_test(obs_col, obs_conditions=obs_conditions)

## Differential isoform expression

In [None]:
# find genes that exhibit DIE between HFFc6 and HepG2
die_table = sg.die_gene_test(obs_col=obs_col,
                             obs_conditions=obs_conditions,
                             verbose=True)

In [None]:
die_table.head()

In [None]:
# save the SwanGraph as a Python pickle file
sg.save_graph('../swan_data/swan')

In [None]:
# load up a saved SwanGraph from a pickle file
sg = swan.read('../swan_data/swan.p')