In [None]:
import numpy as np
from tqdm import tqdm
import pandas as pd
import csv
import sys
import logging
import os
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# mount google drive if neccessary
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
stim_de_file = "/path/to/stim/de/file" # created via the matrix_screen_DE.ipynb notebook
stim_df = pd.read_csv(stim_de_file, sep='\t')

genotype_de_file =  "path/to/mutant/de_file" # created via the matrix_screen_DE.ipynb notebook
geno_df = pd.read_csv(genotype_de_file , sep='\t')

In [None]:
def quadrant_plot(stop_codon_id, geno_df, stim_df):

  stim_variant = stop_codon_id
  stimulation = stop_codon_id.split('_')[0]

  gene_names_geno = geno_df['gene_name'].to_list()
  pval_column_name = stim_variant + '_pval'
  logfc_column_name = stim_variant + '_logfc'
  pvals_geno = np.asarray(geno_df[pval_column_name].to_list())
  logfcs_geno =  np.asarray(geno_df[logfc_column_name].to_list())

  gene_names_stim = stim_df['gene_names'].to_list()
  pval_column_name = stimulation + '_pval_adj'
  logfc_column_name = stimulation + '_logfc'
  pvals_stim =  np.asarray(stim_df[pval_column_name].to_list())
  logfcs_stim =  np.asarray(stim_df[logfc_column_name].to_list())

  cmn_names, idx_geno, idx_stim = np.intersect1d(gene_names_geno, gene_names_stim, return_indices=True)
  pval_out_geno = pvals_geno[idx_geno]
  pval_out_stim = pvals_stim[idx_stim]
  logfc_out_geno = logfcs_geno[idx_geno]
  logfcs_out_stim = logfcs_stim[idx_stim]

  df = pd.DataFrame({
        'logFC Genotype': logfc_out_geno,
        'logFC Stimulation': logfcs_out_stim,
        'Gene Name': cmn_names,
        'pval Genotype': pval_out_geno,
        'pval Stimulation': pval_out_stim
    })

  lower_right = df[
      (df['logFC Genotype'] < 0) &
      (df['logFC Stimulation'] > 0) &
      (df['pval Genotype'] < 0.05) &
      (df['pval Stimulation'] < 0.05)
  ]['Gene Name'].tolist()

  upper_left = df[
      (df['logFC Genotype'] > 0) &
      (df['logFC Stimulation'] < 0) &
      (df['pval Genotype'] < 0.05) &
      (df['pval Stimulation'] < 0.05)
  ]['Gene Name'].tolist()

  # Determine color based on p-values
  df['significance'] = np.where((df['pval Genotype'] < 0.05) & (df['pval Stimulation'] < 0.05), 'red', 'grey')
  df['opacity'] = np.where((df['pval Genotype'] < 0.05) & (df['pval Stimulation'] < 0.05), 0.1, 1)
  df['sort'] = np.where((df['pval Genotype'] < 0.05) & (df['pval Stimulation'] < 0.05), 1, 0.1)

  df = df.sort_values(by='opacity', ascending=False)

  scaling_factor = 0.005

  insignificant_size = 2
  max_size_sig = 5
  min_size_sig = 2

  default_size = insignificant_size * scaling_factor
  df['size'] = default_size

  mask = (df['pval Genotype'] < 0.05) & (df['pval Stimulation'] < 0.05)
  average_pval = (df.loc[mask, 'pval Genotype'] + df.loc[mask, 'pval Stimulation']) / 2
  negative_log10 = -np.log10(average_pval)
  scaled_values = (negative_log10 - negative_log10.min()) / (negative_log10.max() - negative_log10.min())

  df.loc[mask, 'size'] = min_size_sig + (scaled_values * (max_size_sig - min_size_sig))
  df.loc[mask, 'size'] *= default_size


  df.loc[mask, 'size'] = np.nan_to_num(df.loc[mask, 'size'])


  fig = px.scatter(df, y='logFC Stimulation', x='logFC Genotype',
                    hover_data=['Gene Name', 'pval Genotype', 'pval Stimulation'],
                    color='significance',
                    size='size',
                    color_discrete_map={'red': 'red', 'grey': 'grey'})

  fig.add_shape(type="line",
                  x0=0, y0=df['logFC Genotype'].min() - 5, x1=0, y1=df['logFC Genotype'].max() + 5,
                  line=dict(color="black", width=2))

  # Add line through the origin on the y-axis
  fig.add_shape(type="line",
                  x0=df['logFC Stimulation'].min() - 5, y0=0, x1=df['logFC Stimulation'].max() + 5, y1=0,
                  line=dict(color="black", width=2))

  # Filter the DataFrame for significant points
  significant_df = df[df['significance'] == 'red']

  # Check if there are enough points to fit a regression line
  if len(significant_df) > 1:
      # Fit a linear regression model
      coefficients = np.polyfit(significant_df['logFC Genotype'], significant_df['logFC Stimulation'], 1)
      # Create a line based on the fitted model
      polynomial = np.poly1d(coefficients)
      # Generate x values from the minimum to the maximum x values of significant points
      x_values = np.linspace(-8, 8, 100)
      # Generate y values using the polynomial
      y_values = polynomial(x_values)

      # Add the regression line to the plot
      fig.add_traces(go.Scatter(x=x_values, y=y_values, mode='lines',
                                line=dict(color='grey', dash='dash'),
                                name='reg'))
  else:
      print("Not enough significant points to fit a regression line.")

  fig.update_layout(title_text=stim_variant)
  fig.update_layout(yaxis_title="LogFC Stimulated vs. Control Condition")
  fig.update_layout(xaxis_title="LogFC Premature Stop-Codon Mutant vs. AAVS")

  fig.update_traces(marker=dict(sizemode='diameter', sizeref=0.003))  # This controls the size rendering
  fig.update_traces(marker=dict(line=dict(width=1, color='black')))

  fig.update_layout(scene=dict(aspectmode='cube'))
  fig.update_layout(plot_bgcolor='white')

  fig.update_layout(
    width=685,  # Width of the figure in pixels
    height=600,  # Height of the figure in pixels, set equal to width for square
    margin=dict(l=50, r=50, b=50, t=50),  # Adjust margins to ensure squareness
    autosize=False  # Prevent automatic resizing
)

  if stimulation in ['LPS', 'IL1b', 'IFNG']:
    fig.update_xaxes(range=[-6, 6])
    fig.update_yaxes(range=[-6, 6])
  if stimulation == 'IL10':
    fig.update_xaxes(range=[-4, 4])
    fig.update_yaxes(range=[-4, 4])
  if stimulation == 'TGFb':
    fig.update_xaxes(range=[-3, 3])
    fig.update_yaxes(range=[-4, 4])
  if stimulation == 'Pam3CSK4':
    fig.update_xaxes(range=[-8, 8])
    fig.update_yaxes(range=[-4, 4])

  fig.show()

In [None]:
quadrant_plot('LPS_chr3:38139029:G/A_hom', geno_df2, stim_df2) #### change to geno_df1 and stim_df1 for second donor