<a href="https://colab.research.google.com/github/sanjaynagi/AnoExpress/blob/main/workflow/notebooks/summarise-gene-expression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install anoexpress kaleido -q

![image](https://raw.githubusercontent.com/sanjaynagi/AnoExpress/main/docs/logo.png)


In [1]:
import anoexpress as xpress
import pandas as pd
import kaleido

In [2]:
def summarise_expression(analysis, gene_id, microarray):

  # load fc data
  fc_data = xpress.data(
    analysis=analysis,
    data_type='fcs',
    gene_id=gene_id,
    microarray=microarray,
  )
  # load pval data
  pval_data = xpress.data(
    analysis=analysis,
    data_type='pvals',
    gene_id=gene_id,
    microarray=microarray,
  )
  # load metadata
  metadata = xpress.metadata(analysis=analysis, microarray=microarray)

  # add species to pval data and melt
  pval_data = pval_data.T.assign(species=metadata.species.to_list())
  pval_data = pval_data.reset_index(drop=True)
  pval_data = pval_data.melt(id_vars='species', value_name='count')

  # add species to fc data and melt
  fc_data = fc_data.T.assign(species=metadata.species.to_list())
  fc_data = fc_data.reset_index(drop=True)
  fc_data = fc_data.melt(id_vars='species', value_name='count')

  # get up, down and sig genes
  fc_up = fc_data.groupby(['species', 'GeneID'], group_keys=True).apply(lambda x: x > 0)
  fc_down = fc_data.groupby(['species', 'GeneID'], group_keys=True).apply(lambda x: x < 0)
  pval_sig = pval_data.groupby(['species', 'GeneID'], group_keys=True).apply(lambda x: x < 0.05)

  # get intersection of up and sig, down and sig
  fc_up_sig = fc_up & pval_sig
  fc_down_sig = fc_down & pval_sig

  # count total up and down sig
  fc_up_sig_summary = fc_up_sig.groupby(['species', 'GeneID']).agg({'count':'sum'}).rename(columns={'count':'up_sig'})
  fc_down_sig_summary = fc_down_sig.groupby(['species', 'GeneID']).agg({'count':'sum'}).rename(columns={'count':'down_sig'})
  count_summary = fc_up_sig.groupby(['species', 'GeneID']).agg({'count':'count'}).rename(columns={'adj_pval':'total'})

  # mean, median, sd per species
  median_fc = 2**fc_data.groupby(['species', 'GeneID'], group_keys=True).median().rename(columns={'count':'median'})
  mean_fc = 2**fc_data.groupby(['species', 'GeneID'], group_keys=True).mean().rename(columns={'count':'mean'})
  std_fc = (2**fc_data.groupby(['species', 'GeneID'], group_keys=True).std().rename(columns={'count':'sd'}))

  # concat
  expr_summary = pd.concat([fc_up_sig_summary, fc_down_sig_summary, count_summary, median_fc, mean_fc, std_fc], axis=1)

  return expr_summary

def plot_donut_summary(expr_summary, colors=['green', 'purple', 'darkgray']):
  import plotly.graph_objects as go
  expr_summary = expr_summary.reset_index()
  species = expr_summary.species.unique()

  figs = {}
  for sp in expr_summary.species:
    if sp == 'gambiae':
      linecolor='royalblue'
    elif sp == 'coluzzii':
      linecolor='darkorange'
    elif sp == 'arabiensis':
      linecolor='limegreen'
    elif sp == 'funestus':
      linecolor= 'red'

    figs2 = []
    for GeneID in expr_summary.GeneID:
      values = expr_summary.query(f"GeneID == '{GeneID}' and species == '{sp}'")[['up_sig', 'down_sig', 'count']].values[0]
      values[2] = values[2]-(values[0] + values[1])
      labels = ['over-expressed', 'under-expressed', 'non-significant']

      # Use `hole` to create a donut-like pie chart
      fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.4)])
      fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                        marker=dict(colors=colors, line=dict(color=linecolor, width=6)))
      fig.update_layout(title=GeneID, width=400, height=400)
      figs2.append(fig)

    figs[sp] = figs2

  return figs

In [3]:
expr_summary = summarise_expression(analysis='gamb_colu_arab', gene_id=["AGAP006227", "AGAP006228"], microarray=True)
expr_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,up_sig,down_sig,count,median,mean,sd
species,GeneID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
arabiensis,AGAP006227,4,0,15,1.079228,1.259421,1.560894
arabiensis,AGAP006228,5,0,15,1.146847,1.16298,1.245126
coluzzii,AGAP006227,8,7,31,1.035265,1.448078,2.300018
coluzzii,AGAP006228,16,6,31,1.320809,1.317824,1.64738
gambiae,AGAP006227,7,0,8,1.654045,2.252022,1.849406
gambiae,AGAP006228,3,0,8,1.185054,1.154745,1.232608


In [13]:
figs = plot_donut_summary(expr_summary, colors=['lime', 'magenta', 'lightgrey'])

In [14]:
for sp in ['gambiae', 'coluzzii', 'arabiensis']:
  for n in [0,1]:

    figs[sp][n].write_image(f"{sp}_{n}.png")