# Bulk long read RNA-seq analysis with Python

## Load libraries

In [1]:
import swan_vis as swan
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt

## Load data into Python
We have data from C2C12 myoblasts and 3-day differerntiated myotubes. Bulk long-read RNA-seq was performed using polyA primers and PacBio sequencing. To quantify transcript expression, reads were mapped to the mouse genome using Minimap2, then the reads were cleaned and processed with TranscriptClean and [TALON](https://github.com/mortazavilab/TALON). 

We will use [Swan](https://freese.gitbook.io/swan/) to analyze and visualize the C2C12 long-read transcriptome.

## Initialize a new SwanGraph


In [None]:
sg = swan.SwanGraph()

In [None]:
annot_gtf = 'data/gencode.vM21.primary_assembly.annotation_UCSC_names.gtf'
data_gtf = 'data/all_talon_observedOnly.gtf'
ab_file = 'data/all_talon_abundance_filtered.tsv'
talon_db = 'data/talon.db'
pass_list = 'data/all_pass_list.csv'
meta = 'data/metadata.tsv'

## Adding a reference transcriptome

In [None]:
# add an annotation transcriptome
sg.add_annotation(annot_gtf)

## Adding transcript models from a TALON database

In [None]:
# add a dataset's transcriptome and abundance information to
# the SwanGraph
sg.add_transcriptome(talon_db, pass_list=pass_list)

## Adding datasets and their abundance


In [None]:
# add each dataset's abundance information to the SwanGraph
sg.add_abundance(ab_file)

## Saving and loading your SwanGraph

In [None]:
# save the SwanGraph as a Python pickle file
sg.save_graph('swan')

In [None]:
# load up a saved SwanGraph from a pickle file
sg = swan.read('swan.p')

## Adding metadata

In [None]:
sg.add_metadata(meta)