# Ultra-low input RNAseq analysis

- Author: Agustín Sánchez-Belmonte
- Date: 02/02/24
- Project: miR-203 controls developmental timing and early fate restriction during preimplantation embryogenesis
- Experiment: Ultra-low input RNAseq of Inhibitors and siRNAs against targets of MIR-203

## INDEX

0. Set up
1. Load data
2. Exploratory analysis
3. Dimensional reduction
4. Scores
5. Differential expression

## 0. SET UP

In [None]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns

import gseapy as gspy
from bioinfokit import analys, visuz

from matplotlib import rcParams

In [None]:
sc.set_figure_params(dpi=120, color_map='viridis')
sc.settings.verbosity = 3
sc.logging.print_header()
rcParams['figure.figsize'] = 6, 6

pal1 = ["lightblue", "deepskyblue", "dodgerblue", "navajowhite", "darkorange", "orangered"]

sc.set_figure_params(dpi_save=500)

## 1. LOAD DATA

In [None]:
ad = sc.read("counts_withMERVL.tsv", delimiter='\t').T
ad

In [None]:
obs = pd.read_excel("metadata_231222.xlsx", index_col='Sample')
ad.obs = obs
ad.obs

In [None]:
# Separate between siRNAs and Inhibitors experiments for the analysis  
ad_inh = ad[ad.obs.Experiment == 'Inhibitors']
ad_sirna = ad[ad.obs.Experiment == 'SiRNAs']

# 2. EXPLORATORY ANALYSIS

In [None]:
# Filter out low-represented genes in samples
sc.pp.filter_genes(ad_inh, min_cells=3)
sc.pp.filter_genes(ad_sirna, min_cells=3)

In [None]:
sc.pl.highest_expr_genes(ad_inh, n_top=20)
sc.pl.highest_expr_genes(ad_sirna, n_top=20)

In [None]:
sc.pp.normalize_total(ad_inh, target_sum=1e4, inplace=True)
sc.pp.normalize_total(ad_sirna, target_sum=1e4, inplace=True)

In [None]:
# Logarithm the data
sc.pp.log1p(ad_inh)
sc.pp.log1p(ad_sirna)

In [None]:
# Compute highly-variable genes
sc.pp.highly_variable_genes(ad_inh, min_mean=0.0125, max_mean=3, min_disp=0.5)
# Plot
sc.pl.highly_variable_genes(ad_inh) 

# Compute highly-variable genes
sc.pp.highly_variable_genes(ad_sirna, min_mean=0.0125, max_mean=3, min_disp=0.5)
# Plot
sc.pl.highly_variable_genes(ad_sirna)

In [None]:
ad_inh.raw = ad_inh
ad_sirna.raw = ad_sirna

In [None]:
sc.pp.scale(ad_inh, max_value=10)
sc.pp.scale(ad_sirna, max_value=10)

## 3. DIMENSIONAL REDUCTION

In [None]:
# Compute PCA
sc.tl.pca(ad_inh, svd_solver='auto')
sc.tl.pca(ad_sirna, svd_solver='auto')

In [None]:
sc.pl.pca_variance_ratio(ad_inh, log=False) 
sc.pl.pca_variance_ratio(ad_sirna, log=False) 

In [None]:
sc.pl.pca(ad_inh, color=["Condition", 'Condition_2'], size=100)
sc.pl.pca(ad_sirna, color=["Condition", 'Condition_2'], size=100)

In [None]:
sc.pp.neighbors(ad, n_neighbors=6, n_pcs=7) #6 AND 5/!5
sc.tl.umap(ad)

In [None]:
sc.pl.umap(ad, color=["Condition", 'Condition_2'], size=100)

## 4. SCORES

Signatures included in Supplementary Table 1

In [None]:
two_cell = ['Zscan4c', 'Zscan4e', 'Spz1', 'Naalad2', 'Sp110', 'Pramef6', 'Fgf1', 'Bex6', 'Pramel7', 'Kdm4d', 'Zfp352', 'Sytl2', 'Oog4', 'Hmgn3', 'Hspa1b', 'Foxa1']
eight_cell = ['Dppa3', 'Map1lc3b', 'Eloc', 'Gabarapl2', 'Timd2', 'Fbxo15', 'Gm11517', 'Calcoco2', 'Map1lc3a', 'Alppl2', 'Sat1', 'Pemt', 'Mt1', 'Ubxn1', 'Nudt4', 'Zfp706', 'Hprt', 'Sugt1', 'Pdxk', 'Gpd1l', 'Crxos', 'Ptma', 'Gm12617', 'Sumo2', 'Isyna1', 'Npm1', 'Bhmt', 'Rnf7', 'Obox6', 'Mt-rnr2', 'Eif2s2', 'Dnaja1', 'H3f3a', 'Cks2', 'Cited1', '2310040g24rik', 'Ube2c', 'Cd63', 'Pttg1ip', 'Timm17a', 'Mpc2', 'Gcsh', 'Mkrn1', 'Srp9', 'Sfn', 'Tomm5', 'Dnajb6', 'Timm23', 'Gulo', 'Alg13']
EPI = ['Tfcp2l1','Tbx3','Prdm14','Nanog','Esrrb','Klf4','Nr5a2','Pou5f1','Sox2','Nr0b1','Tet2','Klf2','Fbxo15','Utf1','Upp1', 'Zfp42','Tet1','Tdgf1','Tcf15','Dppa5a']
TE = ['Cdx2', 'Tspan8', 'Dppa1', 'Id2', 'Krt8', 'Gata3']
PrE = ['Gata4', 'Gata6', 'Runx1', 'Pdgfra', 'Creb3l2']

In [None]:
order_sir = ['siControl',
              'siKat6a',
              'siKat6b',
              'siP300',
              'siComb']

order_inh = ['Veh',
              'A485',
              'TSA',
            'SAHA']

order_inh_2 = ['Veh',
              'HAT_inh',
             'HDAC_inh']

In [None]:
sc.tl.score_genes(ad_inh, two_cell,score_name='2-cell')
sc.tl.score_genes(ad_inh, eight_cell,score_name='8-cell')
sc.tl.score_genes(ad_inh, EPI,score_name='EPI')
sc.tl.score_genes(ad_inh, TE,score_name='TE')
sc.tl.score_genes(ad_inh, PrE,score_name='PrE')

sc.tl.score_genes(ad_sirna, two_cell,score_name='2-cell')
sc.tl.score_genes(ad_sirna, eight_cell,score_name='8-cell')
sc.tl.score_genes(ad_sirna, EPI,score_name='EPI')
sc.tl.score_genes(ad_sirna, TE,score_name='TE')
sc.tl.score_genes(ad_sirna, PrE,score_name='PrE')

In [None]:
sc.pl.dotplot(ad_inh, ['2-cell','8-cell','EPI','TE','PrE'], groupby="Condition_2", standard_scale='var', categories_order = order_inh_2,save= '.png')
sc.pl.dotplot(ad_sirna, ['2-cell','8-cell','EPI','TE','PrE'], groupby="Condition", standard_scale='var',categories_order = order_sir,save= '2.png')

In [None]:
sc.pl.dotplot(ad_inh, ['MERVL-int','2-cell','8-cell','EPI','TE','PrE'], groupby="Condition", standard_scale='var', categories_order = order_inh,save= '.png')
sc.pl.dotplot(ad_sirna, ['MERVL-int','2-cell','8-cell','EPI','TE','PrE'], groupby="Condition", standard_scale='var',categories_order = order_sir,save= '2.png')

In [None]:
sc.pl.dotplot(ad_inh, ['MERVL-int','2-cell','8-cell','EPI','TE','PrE'], groupby="Condition_2", standard_scale='var',categories_order = order_inh_2, save= '.png')
sc.pl.dotplot(ad_sirna, ['MERVL-int','2-cell','8-cell','EPI','TE','PrE'], groupby="Condition_2", standard_scale='var',categories_order = order_sir, save= '2.png')

In [None]:
sc.pl.dotplot(ad_inh, ['MERVL-int'], groupby="Condition_2", standard_scale='var',categories_order = order_inh_2, save= '.png') #'MERVL_2A-int'
sc.pl.dotplot(ad_sirna, ['MERVL-int'], groupby="Condition", standard_scale='var',categories_order = order_sir, save= '2.png')

In [None]:
# WITHOUT UNKNOWN GENES
two_cell = ['Zscan4c', 'Zscan4e', 'Spz1', 'Naalad2', 'Sp110', 'Pramef6', 'Fgf1', 'Bex6', 'Pramel7', 'Kdm4d', 'Zfp352', 'Sytl2', 'Oog4', 'Hmgn3', 'Hspa1b', 'Foxa1']
eight_cell = ['Dppa3', 'Eloc', 'Gabarapl2', 'Timd2', 'Fbxo15', 'Gm11517', 'Calcoco2', 'Map1lc3a', 'Alppl2', 'Sat1', 'Pemt', 'Mt1', 'Ubxn1', 'Nudt4', 'Zfp706', 'Hprt', 'Sugt1', 'Pdxk', 'Gpd1l', 'Crxos', 'Ptma', 'Gm12617', 'Sumo2', 'Isyna1', 'Npm1', 'Bhmt', 'Rnf7', 'Obox6', 'Eif2s2', 'Dnaja1', 'H3f3a', 'Cks2', 'Cited1', 'Ube2c', 'Cd63', 'Pttg1ip', 'Timm17a', 'Mpc2', 'Gcsh', 'Mkrn1', 'Srp9', 'Sfn', 'Tomm5', 'Dnajb6', 'Timm23', 'Gulo', 'Alg13']
EPI = ['Tfcp2l1','Tbx3','Prdm14','Nanog','Esrrb','Klf4','Nr5a2','Pou5f1','Tet2','Klf2','Fbxo15','Utf1','Upp1', 'Zfp42','Tet1','Tdgf1','Tcf15','Dppa5a']
TE = ['Cdx2', 'Tspan8', 'Dppa1', 'Id2', 'Krt8', 'Gata3']
PrE = ['Gata4', 'Gata6', 'Runx1', 'Pdgfra', 'Creb3l2']

In [None]:
DICT = {'MERVL':'MERVL-int','2-cell':two_cell,'8-cell':eight_cell,'EPI':EPI,'TE':TE,'PrE':PrE}

In [None]:
sc.pl.dotplot(ad_inh, DICT, groupby='Condition', standard_scale='var', categories_order = order_inh, save= '.png')
sc.pl.dotplot(ad_inh, DICT, groupby='Condition_2', standard_scale='var',categories_order = order_inh_2, save= '2.png')
sc.pl.dotplot(ad_sirna, DICT, groupby='Condition', standard_scale='var',categories_order = order_sir, save= '3.png')

In [None]:
# Reordering leiden labels using Categorical data type
ad_inh.obs['Condition'] = ad_inh.obs['Condition'].cat.reorder_categories(list(order_inh), ordered=True)
ad_inh.obs['Condition_2'] = ad_inh.obs['Condition_2'].cat.reorder_categories(list(order_inh_2), ordered=True)
ad_sirna.obs['Condition'] = ad_sirna.obs['Condition'].cat.reorder_categories(list(order_sir), ordered=True)

In [None]:
# cut eight cell signature
DICT_2 = {'MERVL':'MERVL-int','2-cell':two_cell,'8-cell':eight_cell[1:20],'EPI':EPI,'TE':TE,'PrE':PrE}

In [None]:
sc.set_figure_params( fontsize=60, dpi_save=300)

sc.pl.heatmap(ad_inh, DICT_2, groupby='Condition', standard_scale='var',show_gene_labels=True, save= '.png',figsize=(50, 10))
sc.pl.heatmap(ad_inh, DICT_2, groupby='Condition_2', standard_scale='var',show_gene_labels=True, save= '2.png',figsize=(50, 10))
sc.pl.heatmap(ad_sirna, DICT_2, groupby='Condition', standard_scale='var',show_gene_labels=True, save= '3.png',figsize=(50, 10))

### 2 cell only + MERVL-int

In [None]:
sc.pl.heatmap(ad_inh, ['MERVL-int'] +two_cell, groupby='Condition',standard_scale='var', save= '.png')
sc.pl.heatmap(ad_inh, ['MERVL-int'] +two_cell, groupby='Condition_2',standard_scale='var', save= '2.png')
sc.pl.heatmap(ad_sirna, ['MERVL-int'] +two_cell, groupby='Condition',standard_scale='var', save= '3.png')

### MERVL only

In [None]:
sc.pl.heatmap(ad_inh, ['MERVL-int'], groupby='Condition',standard_scale='var')
sc.pl.heatmap(ad_inh, ['MERVL-int'], groupby='Condition_2',standard_scale='var')
sc.pl.heatmap(ad_sirna, ['MERVL-int'], groupby='Condition',standard_scale='var')

## 5. DIFFERENTIAL EXPRESSION

In [None]:
sc.tl.rank_genes_groups(ad_inh, 'Condition', reference='Veh',method='t-test')

In [None]:
sc.pl.rank_genes_groups(ad_inh)

In [None]:
sc.tl.rank_genes_groups(ad_sirna, 'Condition', reference='siControl',method='t-test')

In [None]:
sc.pl.rank_genes_groups(ad_sirna)