In [1]:
import numpy as np
import pandas as pd
import psix

In [2]:
# I had already ran a PCA of the normalized gene expression and filtered out some non-neuron cells. Here I am getting
# this data to get a list of only neurons:

neuron_cells = pd.read_csv('/mnt/lareaulab/cfbuenabadn/psix_project/analysis_psix/midbrain_development/data/pc3_rd.tab.gz', 
                     sep='\t', index_col=0).index

In [None]:
# Create an empty Psix object
psix_object = psix.Psix()

# Process STARsolo output into PSI and mRNA counts:
# sj_dir: Directory with raw STARsolo SJ output
# intron_file: Annotation, proivided in Psix github
# tpm_file: gene x cells matrix of gene expression in TPM
# cell_list: list of cells to keep. In this case, we only use neuron cells
# save_files_in: will save psi and mrna tables in the specified location
# solo = True: indicates that sj_dir has the output of STARsolo

psix_object.junctions2psi(
        sj_dir = '/mnt/lareaulab/cfbuenabadn/RNASeq/Mouse/Tiklova/STARsolo/star_output_NoDedup/Solo.out/SJ/raw',
        intron_file = '/mnt/lareaulab/cfbuenabadn/Genomes/pipeline_files/mm10_introns.tab',
        tpm_file = '/mnt/lareaulab/cfbuenabadn/psix_project/analysis_psix/midbrain_development/data/tpm.tab.gz',
        cell_list = neuron_cells,
        save_files_in='psix_solo_object/',
        solo = True
    )

Processing STARsolo output. This might take a few minutes...


In [None]:
# Running Psix on the pre-processed object

# latent: a low dimensional representation of the cell-state (in this case, a PCA of normalized expression)
# n_jobs: how many threads to use. Psix is slow, but using many threads speeds it up.

psix_object.run_psix(latent='~/psix_project/analysis_psix/midbrain_development/data/pc3_rd.tab.gz', n_jobs=25)

In [None]:
# Exon scores and p-values
psix_object.psix_results.head()

In [None]:
# Computing modules and plotting matrix of correlations

# plot = True: plots the following matrix
# plot_name: saves the plot on the specified location
psix_object.compute_modules(plot = True, plot_name = 'exon_modules_solo.png')

In [None]:
# You can access the modules assigned for each exon here:
psix_object.modules

In [6]:
# save psix object on specified location
psix_object.save_psix_object(psix_dir = 'psix_solo_object/')

In [9]:
# Next time you want to check your results, you can skip all the previous steps:
psix_object = psix.Psix(psix_object = 'psix_solo_object/')