In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import chain
import scipy.stats as stats

Load data from first replicate and try plotting with kdeplot.

In [None]:
sample_names = ['liquid', 'semisolid', 'small spread', 'bead spread', 'large spread']
rep1_samples = []
rep1_names_repeated = []
for i, name in enumerate(sample_names):
  rep1_samples.append(np.load(f'../../data/20220901_final_counts/counts.{i + 1}_counts.npy'))
  rep1_names_repeated.append([name] * len(rep1_samples[i]))
rep1_counts = chain(*rep1_samples)
rep1_names = chain(*rep1_names_repeated)
df1 = pd.DataFrame(zip(rep1_names, rep1_counts), columns=['sample', 'sequence reads'])

rep2_samples = []
rep2_names_repeated = []
for i, name in enumerate(sample_names):
  rep2_samples.append(np.load(f'../../data/20220901_final_counts/counts.{i + 6}_counts.npy'))
  rep2_names_repeated.append([name] * len(rep2_samples[i]))
rep2_counts = chain(*rep2_samples)
rep2_names = chain(*rep2_names_repeated)
df2 = pd.DataFrame(zip(rep2_names, rep2_counts), columns=['sample', 'sequence reads'])

In [None]:
sns.kdeplot(data=df1, x='sequence reads', hue='sample', clip=[1,100])
plt.xlim([0,100])
plt.savefig("../../images/kde_rep1.svg")

In [None]:
sns.kdeplot(data=df2, x='sequence reads', hue='sample', clip=[1,200])
plt.xlim([0,200])
plt.savefig("../../images/kde_rep2.svg")

In [None]:
sns.displot(df1, y='sequence reads', col="sample", aspect=.4, bins=range(100))

In [None]:
sns.displot(df2, y='sequence reads', col="sample", aspect=.4, bins=range(100))

In [None]:
sns.violinplot(data=df1, y='sequence reads', x='sample', bw=0.5)
plt.ylim([0,100])

In [None]:
sns.histplot(data=df1, x='sequence reads', hue='sample', bins=range(1,100))

In [None]:
sns.boxplot(data=df1, y='sequence reads', x='sample')
plt.ylim([0,2200])
plt.savefig("../../images/box_rep1.svg")
sns.boxplot(data=df2, y='sequence reads', x='sample')
plt.savefig("../../images/box_rep2.svg")

In [None]:
def label(x, color, label):
  ax = plt.gca()
  ax.text(0.3, 0.4, label, fontweight="bold", color=color,
      ha="left", va="center", transform=ax.transAxes)

def plot_ridge(df, max, name):
  sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

  # Initialize the FacetGrid object
  pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
  g = sns.FacetGrid(df, row="sample", hue="sample", aspect=5, height=1, palette=pal)

  # Draw the densities in a few steps
  g.map(sns.kdeplot, "sequence reads",
    bw_adjust=.5, clip=[1, max],
    fill=True, alpha=1, linewidth=1.5, cut=1)
  plt.xlim([0,max])
  g.map(sns.kdeplot, "sequence reads", clip_on=False, color="w", lw=2, bw_adjust=.5, clip=[1, max], cut=1)

  # passing color=None to refline() uses the hue mapping
  g.refline(y=0, linewidth=2, linestyle="-", color=None, clip_on=False)

  g.map(label, "sequence reads")

  # Set the subplots to overlap
  g.figure.subplots_adjust(hspace=-.4)

  # Remove axes details that don't play well with overlap
  g.set_titles("")
  g.set(yticks=[], ylabel="")
  g.despine(bottom=True, left=True)
  plt.savefig(f"../../images/{name}.svg")

In [None]:
plot_ridge(df1, 100, 'kde_stack_rep1')
plot_ridge(df2, 200, 'kde_stack_rep2')