In [8]:
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import pandas as pd
from tqdm import tqdm
from ceres_infer.data import stats_Crispr

In [10]:
dm_data_pkl_file = '../out/20.0817 proc_data/gene_effect/dm_data.pkl'
outdir = '../out/20.0817 proc_data_baseline/gene_effect/' # output directory

if not os.path.exists(outdir):
    os.makedirs(outdir)
plt.interactive(False)

# plots

In [12]:
tqdm.pandas()

#----------------------
#load data
dm_data = pickle.load(open(dm_data_pkl_file,'rb'))
df_crispr_stats = stats_Crispr(dm_data)

#----------------------
#plot stats
plt.figure()
ax = sns.distplot(df_crispr_stats['avg'])
ax.set(xlabel='CERES [mean]', ylabel='Freq')
plt.savefig("%s/dist_ceres_mean.pdf" % outdir)
plt.close()

plt.figure()
ax = sns.distplot(df_crispr_stats['std'])
ax.set(xlabel='CERES [SD]', ylabel='Freq')
plt.savefig("%s/dist_ceres_sd.pdf" % outdir)
plt.close()

plt.figure()
ax = sns.scatterplot(x='diff',y='std', data=df_crispr_stats,s=90)
ax.set(xlabel='CERES range', ylabel='CERES sd')
plt.savefig("%s/scatter_range.sd.png" % outdir)
plt.close()

plt.figure()
ax = sns.scatterplot(x='avg',y='std', data=df_crispr_stats,s=90)
ax.set(xlabel='CERES mean', ylabel='CERES sd')
plt.savefig("%s/scatter_mean_sd.png" % outdir)
plt.close()

plt.figure()
ax = sns.scatterplot(x='avg',y='diff', data=df_crispr_stats,s=90)
ax.set(xlabel='CERES mean', ylabel='CERES range')
plt.savefig("%s/scatter_mean_range.png" % outdir)
plt.close()

  from pandas import Panel


# Classifications

In [13]:
# get gene dependency classifications (selective essential, common essentials, common non-essential)
df_genedep = dm_data.df_crispr
df_genedep.columns = df_genedep.columns.str.extract('^(.*)\s').squeeze().values

def classifyDep(x):
    if all(x > 0.5):
        return 'common_essential'
    elif all(x < 0.5):
        return 'common_nonessential'
    else:
        return 'selective_essential'

dep_class = df_genedep.apply(lambda x: classifyDep(x), axis=0)
dep_class.to_csv("%s/gene_essential_classification.csv" % outdir, header=False, index=True)

# Stats

In [7]:
dm_data.df_crispr

Unnamed: 0_level_0,A1BG (1) [CERES],A1CF (29974) [CERES],A2M (2) [CERES],A2ML1 (144568) [CERES],A3GALT2 (127550) [CERES],A4GALT (53947) [CERES],A4GNT (51146) [CERES],AAAS (8086) [CERES],AACS (65985) [CERES],AADAC (13) [CERES],...,ZWILCH (55055) [CERES],ZWINT (11130) [CERES],ZXDA (7789) [CERES],ZXDB (158586) [CERES],ZXDC (79364) [CERES],ZYG11A (440590) [CERES],ZYG11B (79699) [CERES],ZYX (7791) [CERES],ZZEF1 (23140) [CERES],ZZZ3 (26009) [CERES]
DepMap_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACH-000005,-0.068759,0.218792,0.178252,0.158390,-0.193862,-0.324566,0.246220,-0.576495,-0.081217,0.016182,...,-0.176432,-0.391199,-0.182117,-0.108978,0.186545,-0.075884,-0.095781,0.029269,0.000945,-0.242038
ACH-000007,0.053893,0.081444,-0.060170,0.153435,0.087362,0.150684,0.061146,-0.470462,-0.012210,0.277616,...,-0.101852,-0.276755,-0.030821,0.121126,0.214875,-0.019300,-0.342632,0.083610,-0.392722,-0.443380
ACH-000009,0.059874,-0.011153,-0.054367,0.060886,0.039767,0.043527,0.011845,-0.630290,0.161797,0.033587,...,-0.355880,-0.290047,-0.031825,0.115886,0.116784,0.035294,-0.575523,0.228940,-0.114559,-0.549906
ACH-000011,0.277165,0.085354,0.007972,0.445843,-0.036717,-0.261409,0.111173,-0.430867,0.138193,0.120785,...,-0.418769,-0.518908,-0.128187,-0.126336,0.269698,0.148516,-0.227106,0.120656,-0.252444,-0.401821
ACH-000012,0.008073,0.167177,0.088705,0.307599,0.015440,-0.137284,0.264126,-0.491951,0.073552,0.308495,...,-0.393597,-0.834722,0.062316,-0.045060,0.133700,0.122965,-0.132978,0.050321,-0.175689,-0.402526
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ACH-001745,0.208843,0.153637,-0.134906,0.167583,-0.130067,-0.082006,0.108194,-0.511436,0.096255,0.091139,...,-0.006292,-0.598863,0.191322,0.145424,0.056416,0.125346,-0.089675,0.063647,0.123191,-0.459169
ACH-001750,0.044674,0.126563,-0.082100,0.132673,-0.172350,-0.294151,0.113662,-0.673395,0.049989,-0.065654,...,-0.119853,-0.554706,-0.161670,-0.046699,0.111985,-0.007653,-0.087820,0.009488,-0.302427,-0.259728
ACH-001765,0.136364,0.021261,-0.107147,0.076223,-0.116583,0.071279,-0.007449,-0.458998,-0.069348,0.156374,...,-0.159882,-0.440949,0.001038,-0.065006,0.053587,0.206996,-0.247742,0.234458,-0.228322,-0.247546
ACH-001814,0.216507,-0.172366,-0.265359,0.045942,0.123916,0.087353,-0.182381,-0.489776,0.005241,0.111024,...,-0.128753,-0.243608,-0.017122,0.271896,-0.081350,0.281161,-0.088890,-0.040170,-0.377166,-0.395694
