***
# <font color=blue>**Pymaceuticals, Inc**</font> 
***

## Squamous Cell Carcinoma Drug Treatment Analysis

### *Observed Trends:* 

1. *The Capumolin drug was the only treatment that resulted in a reduction in tumor volume over time as well as the 
    highest survival rate for lab mice over time*
1. *The survival rates of the test mice declined rapidly over time*
1. *The number of cancer-spreading sites increased over time for each treatment*

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.legend import Legend
from scipy.stats import sem

target_drugs = ['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']
num_of_target_drugs = len(target_drugs)

In [None]:
#Disable autoscrolling for images or graphs
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
ct_data_df = pd.read_csv("raw_data/clinicaltrial_data.csv", low_memory=False)
md_data_df = pd.read_csv("raw_data/mouse_drug_data.csv", low_memory=False)
combined_data_df = pd.merge(ct_data_df, md_data_df, how='outer', on='Mouse ID')
combined_data_df.head()

<br>
<br>
### Tumor Response to Treatment

In [None]:
new_mean_df_grouped = combined_data_df.groupby(["Drug", "Timepoint"])[['Tumor Volume (mm3)']].mean()
new_mean_df_grouped

In [None]:
new_sem_df_grouped = combined_data_df.groupby(["Drug", "Timepoint"])[['Tumor Volume (mm3)']].sem()
new_sem_df_grouped.head(15)

In [None]:
tumor_vol_mean_df = new_mean_df_grouped.pivot_table(index=["Timepoint"], values=["Tumor Volume (mm3)"], columns=["Drug"])
tumor_vol_mean_df.head()

In [None]:
tumor_vol_sem_df = new_sem_df_grouped.pivot_table(index=["Timepoint"], values=["Tumor Volume (mm3)"], columns=["Drug"])
tumor_vol_sem_df.head()

In [None]:
target_drugs_tumor_vol_mean_df = tumor_vol_mean_df.loc[:, ('Tumor Volume (mm3)', target_drugs)]
print(target_drugs_tumor_vol_mean_df)

vol_list = list((target_drugs_tumor_vol_mean_df.as_matrix(columns=target_drugs_tumor_vol_mean_df.columns[0:1])).flatten())
drug_group_mean_vol = vol_list

for i in range(num_of_target_drugs - 1):
    vol_list = list((target_drugs_tumor_vol_mean_df.as_matrix(columns=target_drugs_tumor_vol_mean_df.columns[i+1:i+2])).flatten())
    drug_group_mean_vol = np.vstack([drug_group_mean_vol, vol_list])
    
#print('------------------------------------------------------------------------')
#print(drug_group_mean_vol)

In [None]:
target_drugs_tumor_vol_sem_df = tumor_vol_sem_df.loc[:, ('Tumor Volume (mm3)', target_drugs)]
print(target_drugs_tumor_vol_sem_df)

vol_list2 = list((target_drugs_tumor_vol_sem_df.as_matrix(columns=target_drugs_tumor_vol_sem_df.columns[0:1])).flatten())
drug_group_sem_vol = vol_list2

for i in range(num_of_target_drugs - 1):
    vol_list2 = list((target_drugs_tumor_vol_sem_df.as_matrix(columns=target_drugs_tumor_vol_sem_df.columns[i+1:i+2])).flatten())
    drug_group_sem_vol = np.vstack([drug_group_sem_vol, vol_list2])

#print('---------------------------------------------------------------------')
#print(drug_group_sem_vol)

In [None]:
time_values = list(target_drugs_tumor_vol_mean_df.index.get_level_values('Timepoint'))
print(time_values)

In [None]:
fig, ax = plt.subplots(figsize=(12,8))
fig.suptitle("Tumor Response To Treatment", fontsize=11)

x_axis = np.arange(0, len(time_values), 1)
ax.margins(0.05)
ax.grid(ls='dashed')
ax.set_xlim(-0.5, len(time_values))
ax.set_ylim(25, 75)
ax.set_xticks(range(0, len(time_values)))
ax.set_xticklabels(time_values)
ax.set_ylabel("Tumor Volume")
ax.set_xlabel("Time (Days)")

for name, means, sems in zip(target_drugs, drug_group_mean_vol, drug_group_sem_vol):
    plot_color = np.random.rand(3,)
    ax.scatter(x_axis, means, s=90, marker='s', label=name, c=plot_color)
    ax.plot(x_axis, means, '--', ms=12, c=plot_color)
    ax.errorbar(x_axis, means, yerr=sems, fmt='o', c='#000000')

ax.legend(loc="best", fontsize="medium", fancybox=True, shadow=True, borderpad=1)

plt.show()


<br>
<br>
### Metastatic Response to Treatment 

In [None]:
meta_df_grouped = combined_data_df.groupby(["Drug", "Timepoint"])[['Metastatic Sites']].mean()
meta_df_grouped.head()

In [None]:
meta_sem_df_grouped = combined_data_df.groupby(["Drug", "Timepoint"])[['Metastatic Sites']].sem()
meta_sem_df_grouped.head()

In [None]:
meta_df2 = meta_df_grouped.pivot_table(index=["Timepoint"], values=["Metastatic Sites"], columns=["Drug"])
meta_df2.head()

In [None]:
meta_sem_df2 = meta_sem_df_grouped.pivot_table(index=["Timepoint"], values=["Metastatic Sites"], columns=["Drug"])
meta_sem_df2.head()

In [None]:
target_drugs_meta_mean_df = meta_df2.loc[:, ('Metastatic Sites', target_drugs)]
print(target_drugs_meta_mean_df)
print('---------------------------------------------------------------------')

meta_list = list((target_drugs_meta_mean_df.as_matrix(columns=target_drugs_meta_mean_df.columns[0:1])).flatten())
drug_group_mean_meta = meta_list

for i in range(num_of_target_drugs - 1):
    meta_list = list((target_drugs_meta_mean_df.as_matrix(columns=target_drugs_meta_mean_df.columns[i+1:i+2])).flatten())
    drug_group_mean_meta = np.vstack([drug_group_mean_meta, meta_list]) 

target_drugs_meta_sem_df = meta_sem_df2.loc[:, ('Metastatic Sites', target_drugs)]
print(target_drugs_meta_sem_df)
print('---------------------------------------------------------------------')

meta_list = list((target_drugs_meta_sem_df.as_matrix(columns=target_drugs_meta_sem_df.columns[0:1])).flatten())
drug_group_sem_meta = meta_list

for i in range(num_of_target_drugs - 1):
    meta_list = list((target_drugs_meta_sem_df.as_matrix(columns=target_drugs_meta_sem_df.columns[i+1:i+2])).flatten())
    drug_group_sem_meta = np.vstack([drug_group_sem_meta, meta_list])
    
time_values = list(target_drugs_meta_mean_df.index.get_level_values('Timepoint'))

In [None]:
fig2, ax2 = plt.subplots(figsize=(12,8))
fig2.suptitle("Metastatic Spread During Treatment", fontsize=14)

x_axis = np.arange(0, len(time_values), 1)
ax2.margins(0.05)
ax2.grid(ls='dashed')
ax2.set_xlim(-0.5, len(time_values))
ax2.set_ylim(0.0, 4.0)
ax2.set_xticks(range(0, len(time_values)))
ax2.set_xticklabels(time_values)
ax2.set_ylabel("Metastatic Sites")
ax2.set_xlabel("Time (Days)")

for name, means, sems in zip(target_drugs, drug_group_mean_meta, drug_group_sem_meta):
    plot_color = np.random.rand(3,)
    ax2.scatter(x_axis, means, s=90, marker='s', label=name, c=plot_color)
    ax2.plot(x_axis, means, '--', ms=12, c=plot_color)
    ax2.errorbar(x_axis, means, yerr=sems, fmt='o', c='#000000')

ax2.legend(loc="best", fontsize="medium", fancybox=True, shadow=True, borderpad=1)

plt.show()

<br>
<br>
### Survival Rates

In [None]:
survived_df_grouped = combined_data_df.groupby(["Drug", "Timepoint"])[['Mouse ID']].count()
survived_df_grouped = survived_df_grouped.rename(columns={"Mouse ID":"Mouse Count"})
survived_df_grouped.head()

In [None]:
survived_df2 = survived_df_grouped.pivot_table(index=["Timepoint"], values=["Mouse Count"], columns=["Drug"])
survived_df2.head()

In [None]:
target_drugs_survived_df = survived_df2.loc[:, ('Mouse Count', target_drugs)]
print(target_drugs_survived_df)

target_percent_survived_df = target_drugs_survived_df.apply(lambda x:100 * x / target_drugs_survived_df.iloc[0,0])

percent_survived_list = list((target_percent_survived_df.as_matrix(columns=target_percent_survived_df.columns[0:1])).flatten())
percent_survived = percent_survived_list

for i in range(num_of_target_drugs - 1):
    percent_survived_list = list((target_percent_survived_df.as_matrix(columns=target_percent_survived_df.columns[i+1:i+2])).flatten())
    percent_survived = np.vstack([percent_survived, percent_survived_list])
    
#print(percent_survived)

In [None]:
fig3, ax3 = plt.subplots(figsize=(12,8))
fig3.suptitle("Survival During Treatment", fontsize=14)

x_axis = np.arange(0, len(time_values), 1)
ax3.margins(0.05)
ax3.grid(ls='dashed')
ax3.set_xlim(-0.5, len(time_values))
ax3.set_ylim(30, 100)
ax3.set_xticks(range(0, len(time_values)))
ax3.set_xticklabels(time_values)
ax3.set_ylabel("Survival Rate (%)")
ax3.set_xlabel("Time (Days)")

for name, count in zip(target_drugs, percent_survived):
    plot_color = np.random.rand(3,)
    ax3.scatter(x_axis, count, s=90, marker='s', label=name, c=plot_color)
    ax3.plot(x_axis, count, '--', ms=12, c=plot_color)
    ax3.errorbar(x_axis, count, yerr=sems, fmt='o', c='#000000')

ax3.legend(loc="best", fontsize="medium", fancybox=True, shadow=True, borderpad=1)

plt.show()


<br>
<br>
### Summary Bar Graph

In [None]:
tumor_vol_start = (new_mean_df_grouped.iloc[new_mean_df_grouped.index.get_level_values('Timepoint') == 0]).unstack(level=1)
tumor_vol_start.columns = tumor_vol_start.columns.droplevel()

tumor_vol_end = (new_mean_df_grouped.iloc[new_mean_df_grouped.index.get_level_values('Timepoint') == 45]).unstack(level=1)
tumor_vol_end.columns = tumor_vol_end.columns.droplevel()

tumor_vol_chng = pd.concat([tumor_vol_start,tumor_vol_end], axis=1)
tumor_vol_chng.columns.name = None

tumor_vol_perc_chng = (1-(tumor_vol_chng.iloc[:,0].div(tumor_vol_chng.iloc[:,1], axis=0))).mul(100)
print(tumor_vol_perc_chng)

In [None]:
target_drugs_perc_chng = tumor_vol_perc_chng.loc[target_drugs].tolist()
target_drugs_x = ['0','Capomulin', 'Infubinol', 'Ketapril', 'Placebo']

tumor_vol_chng_plot = pd.DataFrame({'perc' : target_drugs_perc_chng})
tumor_vol_chng_plot['colors'] = 'g'
tumor_vol_chng_plot.loc[tumor_vol_chng_plot.perc>=0,'colors'] = 'r'

fig4, ax4 = plt.subplots(figsize=(12, 8))
fig4.suptitle("Tumor Change Over 45-day Treatment", fontsize=14)

x_axis = np.arange(len(target_drugs))
ax4.bar(tumor_vol_chng_plot.index, tumor_vol_chng_plot.perc, color=tumor_vol_chng_plot.colors, align='center', edgecolor='black')
ax4.grid(ls='dashed')
ax4.set_xlim(-0.5, len(x_axis))
ax4.set_ylim(-20, max(target_drugs_perc_chng))
ax4.set_ylabel("% Tumor Volume Change")
ax4.set_xticklabels(target_drugs_x)
ax4.set_xticks([value + 1.5 for value in x_axis], target_drugs)

plt.show()
