In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import plotly.express as px

In [None]:
# "Data" contains the E31 data including the mean std from the bootstrapping
Data = pd.read_csv('E31_LOPAC_with_bootstrap.csv')

Data = Data.rename(columns={'conc':'concentration'})

In [None]:
Data

## senescence score

In [None]:
#group data by plate name (eg. A-P) 
#calculate 4std above mean DMSO controls per plate 

grouped = Data.groupby(['Metadata_platename'], as_index=False)
tot = []

for name, plate in grouped:
    # "name" ids the name of the plate, and "plate" is the data for that plate
    #print(name)
    #print(plate)
    new_dat = (np.mean(plate.loc[plate['compound']== 'DMSO']['sen_score'])) + plate['boostrapped_senscore_mean_std']*4
    tot.append(new_dat)


col_new = pd.concat(tot)
Data["DMSOsen_score+bootstrap_std4"] = col_new
        
#creating a dataframe of compounds higher than two stds from the DMSO mean sen_score and with more than 500 cells 
interesting_no_of_cells = 500

higher = Data.loc[(Data['sen_score']>Data['DMSOsen_score+bootstrap_std4'])]

higher_500 = higher.loc[(higher['cell_no']>interesting_no_of_cells)]

#creating a compound list

higher_list = higher_500.compound.tolist()

#creating a dataframe of all compounds appearing in the list 

Allhigherdata = pd.DataFrame(Data[Data['compound'].isin(higher_list)])

#column 'higher' indicates which concentrations of the interesting compounds are significant

Allhigherdata['higher'] = np.where((Allhigherdata['sen_score'] > Allhigherdata['DMSOsen_score+bootstrap_std4']) & (Allhigherdata['cell_no'] > interesting_no_of_cells), True, False)

Allhigherdata = pd.DataFrame(Allhigherdata.sort_values('concentration'))

In [None]:
#exporting

file_name = 'E31_bootstrap_LOPAC_compounds.csv'

Allhigherdata.to_csv(file_name)

## fraction senescent

In [None]:
#calculate number senescent and create column

Data['number_sen'] = Data['fraction_sen']*Data['cell_no']

#group data by plate name (A-P) 
#calculate 2std above mean DMSO controls per plate 

tot_fraction = []

for name, plate in grouped:
    #print(np.mean(plate.loc[plate['compound']=='Targetmol_DMSO']['sen_score']))
    #print(name)
    new_data = (np.mean(plate.loc[plate['compound']=='DMSO']['fraction_sen']))*plate['cell_no'] + ((plate['boostrapped_numcells_mean_std'])*4)
    tot_fraction.append(new_data)
    
col_new = pd.concat(tot_fraction)
Data["DMSOnumber_sen+bootstrap_std4"] = col_new
        
#creating a dataframe of compounds higher than two stds from the DMSO mean senescent number and with more than 200 cells 
interesting_no_of_cells = 500

higher_number_sen = Data.loc[(Data['number_sen']>Data['DMSOnumber_sen+bootstrap_std4'])]

higher_500_number_sen = higher_number_sen.loc[(higher_number_sen['cell_no']>interesting_no_of_cells)]

higher_1500_number_sen = higher_number_sen.loc[(higher_number_sen['cell_no']>1500)]

#list of interesting compounds

higher_list_number_sen = higher_500_number_sen.compound.tolist()

#dataframe of interesting compounds at all concentrations 

FractionAllhigherdata = pd.DataFrame(Data[Data['compound'].isin(higher_list_number_sen)])

#column 'higher' indicating which concentrations of the interesting compounds are significant

FractionAllhigherdata['higher'] = np.where((FractionAllhigherdata['number_sen'] > FractionAllhigherdata['DMSOnumber_sen+bootstrap_std4']) & (FractionAllhigherdata['cell_no'] > interesting_no_of_cells), True, False)

FractionAllhigherdata = pd.DataFrame(FractionAllhigherdata.sort_values('concentration'))


In [None]:
#exporting

file_name = 'E31_bootstrap_LOPAC_compounds_fraction.csv'

FractionAllhigherdata.to_csv(file_name)

## data visualisation

In [None]:
negative_control = (Data.set_index(['compound'])).loc[['DMSO']]

fig, axes = plt.subplots(1, figsize=(15,10))

sb.scatterplot(data=Data, y='cell_no', x='sen_score', alpha=0.5, s=20, label='all data points', legend=False, size='concentration')
sb.scatterplot(data=higher_500, y='cell_no', x='sen_score', label='greater than 4 std above the DMSO mean', s=20, size='concentration')
#sb.scatterplot(data=Data, y='cell_no', x='DMSOsen_score+bootstrap_std4', s=20, alpha=0.2, label= '4 std above the DMSO mean')
sb.scatterplot(data=negative_control, y='cell_no', x='sen_score', label='negative control - DMSO', color='m', size='concentration', alpha=0.7, legend=False)


plt.legend(fontsize=16)
plt.xlabel('Senescence Score', fontsize=20)
plt.ylabel('Total number of cells', fontsize=20)
plt.title('E31 LOPAC', fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

In [None]:
fig, axes = plt.subplots(1, figsize=(15,10))


sb.scatterplot(data=Data, y='cell_no', x='number_sen', alpha=0.5, label='all data', legend=False)
#sb.scatterplot(data=Data, y='cell_no', x='DMSOnumber_sen+bootstrap_std4', alpha=0.5, color='g', label='bootstrap mean DMSO + 4 std')
sb.scatterplot(data=higher_500_number_sen, y='cell_no', x='number_sen', label='greater than 4 std above mean', size='concentration', legend=False)
sb.scatterplot(data=negative_control, y='cell_no', x='number_sen', label='negative control - DMSO', color='m', size='concentration')

plt.legend(fontsize=16)
plt.xlabel('Number of senescent cells', fontsize=20)
plt.ylabel('Total number of cells', fontsize=20)
plt.title('E31 LOPAC', fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

In [None]:
higher_1500_number_sen