<a href="https://colab.research.google.com/github/sanjaynagi/rna-seq-meta/blob/main/workflow/notebooks/plot-gene-expression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! git clone https://github.com/sanjaynagi/rna-seq-meta.git

import pandas as pd
import numpy as np
import plotly.express as px

def plotly_gene_fcs(gene_id, title, plot_type='strip', sort_by='agap', width=1000, height=None):
  """Plot fold changes of provided AGAP gene IDs from RNA-Seq 
  meta-analysis dataset

  Parameters
  ----------
  gene_id : str or list
      An AGAP identifier or list of AGAP identifiers
  title : str
      Plot title
  plot_type : {"strip", "boxplot"}
      valid options are 'strip' or 'boxplot' 
  sort_by : {"median", "mean", "agap"}
      sort by median/mean of fold changes (descending), or by AGAP
      identifier
  width : int
      Width in pixels of the plotly figure
  height: int, optional
      Height in pixels of the plotly figure. Defaults to automatic sizing
  """
  # load metadata
  metadata = pd.read_csv(meta_path, sep="\t")  
  # load fold change data and remove gene description column
  fc_data = pd.read_csv(fc_path, sep="\t")
  #pval_data = pd.read_csv("rna-seq-meta/results/pval_data.tsv", sep="\t")
  fc_data = fc_data.iloc[:, :-1]

  fam_fc_data = fc_data.query("GeneID in @gene_ids").copy()

  if sort_by == 'median':
    sort_idxs = np.argsort(fam_fc_data.set_index(['GeneID', 'GeneName']).apply(np.nanmedian, axis=1)).values[::-1]
  elif sort_by == 'mean':
    sort_idxs = np.argsort(fam_fc_data.set_index(['GeneID', 'GeneName']).apply(np.nanmean, axis=1)).values[::-1]
  elif sort_by == 'agap':
    sort_idxs = np.argsort(fam_fc_data['GeneID'].values)[::-1] 
  fam_fc_data = fam_fc_data.iloc[sort_idxs, :]
    
  fam_fc_data.loc[:, 'Label'] = [id_ + " | " + name if name != "" else id_ for id_, name in zip(fam_fc_data['GeneID'].fillna(""), fam_fc_data['GeneName'].fillna(""))]
  fam_fc_data =fam_fc_data.drop(columns=['GeneName', 'GeneID']).melt(id_vars='Label', var_name='comparison', value_name='log2FC')
  fam_fc_data.loc[:, 'comparison'] = fam_fc_data['comparison'].str.replace("_log2FoldChange", "")
  fam_fc_data = fam_fc_data.merge(metadata, how='left')
  fam_fc_data.loc[:, 'log2FC'] *= -1 # invert the FCs (currently > 0 log2FC = overexpression in susceptible)

  if not height:
    height = np.min([fam_fc_data.shape[0]*12, 2500])
  
  my_plot = px.strip if plot_type == 'strip' else px.box
  fig = my_plot(
      fam_fc_data, 
      y='Label', 
      x='log2FC', 
      color='species',
      title=title, 
      hover_data=['resistant', 'susceptible', 'species', 'country'],
      width=width, 
      height=height,
      template='ggplot2'
  )
  fig.update_layout(titlefont=dict(size=20), xaxis_range=[-4,6],     xaxis_title="log2 Fold Change", yaxis_title="Gene")
  fig.add_vline(0,  line_width=1, line_dash="dash", line_color="grey")
  fig.show()

In [None]:
#@title **RNA-Seq-Meta** { run: "auto" }
#@markdown This notebook produces interactive strip and boxplots with plotly, to summarise gene expression across ~30 *Anopheles* insecticide resistant v susceptible RNA-Sequencing experiments. 
#@markdown Currently *An. gambiae* is not split into *gambiae* and *coluzzii* (this will change). You can toggle which species are displayed by clicking the legend. Because *An. funestus* experiments are included,
#@markdown only *gambiae* genes with *funestus* orthologs are present.  
#@markdown   
#@markdown Requesting feedback and ideas for how to explore the data.

GeneID = "AGAP006227" #@param {type:"string"} 
plot_type = "strip" #@param ['strip', 'boxplot']

plotly_gene_fcs(gene_id=GeneID, title="", plot_type=plot_type, height=300)


We can provide a list of genes, and sort by either AGAP identifier, or by the median fold-change. 

In [None]:
plotly_gene_fcs(gene_id=["AGAP006222", "AGAP006227", "AGAP006228"], sort_by='agap', title="Coeae1f", plot_type='strip', height=300)

You can also produce a boxplot, although the hovertext doesnt quite work as expected.

In [None]:
plotly_gene_fcs(gene_id=["AGAP006222", "AGAP006227", "AGAP006228"], sort_by='median', title="Coeae1f", plot_type='boxplot', height=300)