##### Examples dev TFs
- Look up specific TFs involved in development, for example temporal TFs from embryonic NBs
- Also look up the pair-rule and gap genes
- These genes were manually parsed out of reviews and/or Interactive Fly and looked up in Flybase 632

In [None]:
#Imports
import sys
import os
import pandas as pd
import seaborn as sns
import numpy as np

sys.path.append('../scripts')
from plot_helpers import *
from utilities import load_dataset

%load_ext autoreload
%autoreload 2

In [None]:
# Import the stability data
outdir = '../Figures/Examples'
os.makedirs(outdir, exist_ok=True)

# Load stability data
rate_df = load_dataset('../Figures/summary_files/INSPEcT_rates.csv', '../Figures/summary_files/brain4sU_passed.csv')
rate_df['round_halflife'] = rate_df['halflife'].round(0).astype(int)

# Here you can see genes that were eliminated due to expression filtering or INSPEcT filtering
infile = os.path.join(results_dir, 'gene_quantification','summary_abundance_by_gene_filtered.csv')
df = pd.read_csv(infile)

In [None]:
me_df = pd.read_csv('../Figures/Devreg/gene_cat_me3.csv', index_col='gene')
me_df['me3'] = me_df['category'] == 'updowngene'

In [None]:
(me_df.index == rate_df.index).all()

In [None]:
indir1 = '../../resources/'
pattern_genelist = pd.read_csv(os.path.join(indir1, 'patterning_genes_628.csv'), header=None)[0].values
nb_genelist = pd.read_csv(os.path.join(indir1, 'neuraldev_genes_628.csv'), header=None)[0].values
segpol_genelist = pd.read_csv(os.path.join(indir1, 'segpol_genes_628.csv'), header=None)[0].values

In [None]:
def sort_and_write(df, genelist, outname):
    sdf = df[df.index.isin(genelist)].copy()
    sdf.sort_values(by='gene_name', inplace=True, key=lambda col: col.str.lower())
    sdf[['gene_name', 'round_halflife']].to_csv(os.path.join(outdir, '%s.csv' % outname))

sort_and_write(rate_df, nb_genelist, 'nb_stab')
sort_and_write(rate_df, pattern_genelist, 'pattern_stab')

In [None]:
# Just add the pattern factor and nb factors to the rate_df
me_df['nb_factor'] = me_df.index.isin(nb_genelist)
me_df['pattern_factor'] = me_df.index.isin(pattern_genelist)
me_df['segpol_factor'] = me_df.index.isin(segpol_genelist)

In [None]:
# Get RNAs which are regulated temporally in neuroblasts or in pattern formation
# Genes will appear multiple times if belong to multiple groups
dfs = []
for i in ['nb_factor', 'pattern_factor', 'segpol_factor']:
    sdf = me_df.query(i).copy()
    sdf['factor'] = i
    dfs.append(sdf)
df2 = pd.concat(dfs)

In [None]:
# Need to drop ones which are only seg_pol factor
df3 = df2.query('~segpol_factor').copy()

In [None]:
# Plot the developmental TFs and their stability
fig = plt.figure(figsize=(dfig*1.5, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
order = ['nb_factor', 'pattern_factor']
order2 = ['neural fate', 'patterning']
ax = sns.swarmplot(data=df3, x='stab_percentile', y='factor', palette=['black', color_dict['grey']], hue='TF',
                   order=order, ax=ax, s=4)
gene_nums = df3['factor'].value_counts().loc[order]
new_labels = [f'{i}\n(n = {j})' for i,j in zip(order2, gene_nums)]

offsets0 = ax.collections[0].get_offsets()
offsets1 = ax.collections[1].get_offsets()
offsets = np.vstack([offsets0, offsets1])
df3['pt_idx'] = df3['stab_percentile'].apply(lambda x: (np.abs(offsets[:, 0] - x)).argmin())
df3[['x_pos', 'y_pos']] = offsets[df3['pt_idx']]
ax.scatter(df3.query('me3')['x_pos'], df3.query('me3')['y_pos'], ec=color_dict['purple'], linewidth=0.7, zorder=10, label='me3')
h, l = ax.get_legend_handles_labels()
for i in h:
    i.set_sizes([30])
ax.legend(h, ['non-TF', 'TF', 'me3'])
ax.set_yticklabels(new_labels)
ax.set_xlabel('stability percentile')
ax.set_ylabel('process')
plt.savefig('%s.%s' % (os.path.join(outdir, 'development_swarm0'), out_fmt), dpi = out_dpi)

In [None]:
# Swarm plot of patterning and neural fate genes, with segment polarity excluded
fig = plt.figure(figsize=(dfig*1.5, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
# It doesn't really make sense to plot the background distribution because of the percentile transformation
ax = sns.violinplot(data=df3, x='stab_percentile', y='factor', ax=ax, palette=[color_dict['purple'], color_dict['blue']], inner=None)
ax = sns.swarmplot(data=df3, x='stab_percentile', y='factor', palette=['black', 'white'], hue='TF', ax=ax, s=2)
ax.set_xlabel('stability percentile')
ax.set_ylabel('RNA regulated in:')
ax.set_yticklabels(['neural fate', 'patterning'])
current_handles, current_labels = plt.gca().get_legend_handles_labels()
ax.legend(current_handles, ['non-TF', 'TF'], bbox_to_anchor=(0.5, 1.0), loc=8, ncol = 2, facecolor=color_dict['grey'])
plt.savefig('%s.%s' % (os.path.join(outdir, 'development_swarm1'), out_fmt), dpi = out_dpi)

In [None]:
# https://stackoverflow.com/questions/49612037/combine-overlay-two-factorplots-in-matplotlib
fig = plt.figure(figsize=(dfig*1.5, dfig), constrained_layout=True)
ax = fig.add_subplot(111)
# It doesn't really make sense to plot the background distribution because of the percentile transformation
ax = sns.violinplot(data=df2, x='stab_percentile', y='factor', ax=ax, palette=[color_dict['purple'], color_dict['blue']], inner=None)
ax = sns.swarmplot(data=df2, x='stab_percentile', y='factor', palette=['black', 'white'], hue='TF', ax=ax, s=2)
ax.set_xlabel('stability percentile')
ax.set_ylabel('RNA regulated in:')
ax.set_yticklabels(['neural fate', 'patterning', 'segment polarity'])
current_handles, current_labels = plt.gca().get_legend_handles_labels()
ax.legend(current_handles, ['non-TF', 'TF'], bbox_to_anchor=(0.5, 1.0), loc=8, ncol = 2, facecolor=color_dict['grey'])
plt.savefig('%s.%s' % (os.path.join(outdir, 'development_swarm2'), out_fmt), dpi = out_dpi)

In [None]:
# What is the overlap of the neural_fate group and the patterning group?
reused_genes = df2.query('nb_factor').index.intersection(df2.query('pattern_factor').index)