In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import plotly.express as px
import plotly.graph_objs as go

## load in data for interesting compounds

In [None]:
E57 = pd.read_csv('./E57_drug_discovery/E57_bootstrap_higher_compounds.csv')
E31 = pd.read_csv('./E31_drug_discovery/E31_bootstrap_higher_compounds.csv')
E57_LOPAC = pd.read_csv('./E57_drug_discovery/E57_bootstrap_LOPAC_compounds.csv')
E31_LOPAC = pd.read_csv('./E31_drug_discovery/E31_bootstrap_LOPAC_compounds.csv')

E57['cellline'] = 'E57'
E57_LOPAC['cellline'] = 'E57'
E31['cellline'] = 'E31'
E31_LOPAC['cellline'] = 'E31'

In [None]:
E57_fraction = pd.read_csv('./E57_drug_discovery/E57_bootstrap_higher_compounds_fraction.csv')
E31_fraction = pd.read_csv('./E31_drug_discovery/E31_bootstrap_higher_compounds_fraction.csv')
E57_LOPAC_fraction = pd.read_csv('./E57_drug_discovery/E57_bootstrap_LOPAC_compounds_fraction.csv')
E31_LOPAC_fraction = pd.read_csv('./E31_drug_discovery/E31_bootstrap_LOPAC_compounds_fraction.csv')

E57_fraction['cellline'] = 'E57'
E57_LOPAC_fraction['cellline'] = 'E57'
E31_fraction['cellline'] = 'E31'
E31_LOPAC_fraction['cellline'] = 'E31'

## Find compounds in all 4 lists

In [None]:
E57["compound"] = E57["compound"].str.split("_", expand = True)[1]
E31["compound"] = E31["compound"].str.split("_", expand = True)[1]
E57_fraction["compound"] = E57_fraction["compound"].str.split("_", expand = True)[1]
E31_fraction["compound"] = E31_fraction["compound"].str.split("_", expand = True)[1]
Data = pd.concat([E57, E57_LOPAC, E31, E31_LOPAC])
Data = Data.drop(columns = 'Unnamed: 0')

In [None]:
E31_targetmol_interesting = set(E31["compound"])
E57_targetmol_interesting = set(E57["compound"])
E31_LOPAC_interesting = set(E31_LOPAC["compound"])
E57_LOPAC_interesting = set(E57_LOPAC["compound"])
E31_targetmol_interesting_fraction = set(E31_fraction["compound"])
E57_targetmol_interesting_fraction = set(E57_fraction["compound"])
E31_LOPAC_interesting_fraction = set(E31_LOPAC_fraction["compound"])
E57_LOPAC_interesting_fraction = set(E57_LOPAC_fraction["compound"])

In [None]:
targetmol_interesting = set.intersection(*map(set,[E31_targetmol_interesting, E57_targetmol_interesting, E31_targetmol_interesting_fraction, E57_targetmol_interesting_fraction]))
LOPAC_interesting = set.intersection(*map(set,[E31_LOPAC_interesting,E57_LOPAC_interesting, E31_LOPAC_interesting_fraction, E57_LOPAC_interesting_fraction]))

In [None]:
targetmol_interesting

In [None]:
interesting_compound_list = list(targetmol_interesting) + list(LOPAC_interesting)


InterestingCompounds = Data[Data['compound'].isin(interesting_compound_list)]

In [None]:
fig = px.line(InterestingCompounds, x='concentration', y='sen_score', color='compound', symbol='cellline', log_x=True, text='higher', hover_data = ['cellline'])
fig.update_traces(textposition='top center')


fig.show()

In [None]:
#plots per compound with at least one concentration with a senescence score greater than 2 stds above the mean
#points greater than 2 stds above the mean are shown in red 

plt.figure(figsize=(35,200))
plt.subplots_adjust(hspace=0.5)

for n, compound in enumerate(InterestingCompounds.set_index(['compound','concentration','cellline']).groupby(level=0)):
    #print(compound[0])
    ax = plt.subplot(40, 4, n + 1)
    reset_compound = compound[1].reset_index()

    ax.plot(reset_compound.loc[reset_compound['cellline']=='E31']['concentration'], reset_compound.loc[reset_compound['cellline']=='E31']['sen_score'], 'g', marker='o', zorder=1, label='E31')
    ax.plot(reset_compound.loc[reset_compound['cellline']=='E57']['concentration'], reset_compound.loc[reset_compound['cellline']=='E57']['sen_score'], marker='o', zorder=1, label='E57')
    ax.scatter(data=reset_compound.loc[reset_compound['higher']==True], x='concentration', y='sen_score', color='r', zorder=2, label='above 4std')
    ax.set_title(compound[0], fontsize=25)
    ax.set_xlabel("concentration", fontsize=20)
    ax.set_ylabel("sen_score", fontsize=20)
    ax.set_ylim((-1.0,0.0))
    ax.tick_params(axis='both', which='major', labelsize=20)
    
ax.legend(loc='upper center', bbox_to_anchor=(0, -0.5), fancybox=True, shadow=True, ncol=5, fontsize=20)

In [None]:
InterestingCompounds.to_csv("InterestingCompounds.csv")

In [None]:
tested = ["Palbociclib hydrochloride", "Talazoparib", "Dexamethasone", "PD153035 hydrochloride", "Budesonide", "Temozolomide", "Etoposide"]

In [None]:
plt.figure(figsize=(35,200))
plt.subplots_adjust(hspace=0.5)
count = 0

for n, compound in enumerate(Data.set_index(['compound','concentration','cellline']).groupby(level=0)):
    if compound[0] in tested:
        ax = plt.subplot(40, 4, count + 1)
        reset_compound = compound[1].reset_index()

        ax.plot(reset_compound.loc[reset_compound['cellline']=='E31']['concentration'], reset_compound.loc[reset_compound['cellline']=='E31']['sen_score'], 'g', marker='o', zorder=1, label='E31')
        ax.plot(reset_compound.loc[reset_compound['cellline']=='E57']['concentration'], reset_compound.loc[reset_compound['cellline']=='E57']['sen_score'], marker='o', zorder=1, label='E57')
        ax.scatter(data=reset_compound.loc[reset_compound['higher']==True], x='concentration', y='sen_score', color='r', zorder=2, label='above 4std')
        ax.set_title(compound[0], fontsize=25)
        ax.set_xlabel("concentration", fontsize=20)
        ax.set_ylabel("sen_score", fontsize=20)
        ax.set_ylim((-1.0,0.0))
        ax.tick_params(axis='both', which='major', labelsize=20)
        count = count +1
    
ax.legend(loc='upper center', bbox_to_anchor=(0, -0.5), fancybox=True, shadow=True, ncol=5, fontsize=20)

In [None]:
Data["compound"][350:]