# Pymaceuticals

## Discussion of findings contained in last cell

In [1]:
%matplotlib notebook

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

mouse_file = "data/mouse_drug_data.csv"
clinical_file = "data/clinicaltrial_data.csv"

mouse_data = pd.read_csv(mouse_file,low_memory=False)
clinical_data = pd.read_csv(clinical_file,low_memory=False)

trial_df = pd.merge(clinical_data, mouse_data, how='outer', on='Mouse ID')

trial_df.head()


Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


## Tumor Response to Treatment

In [2]:
tvolmean = trial_df.groupby(['Drug','Timepoint'])['Tumor Volume (mm3)'].mean()

tvol_mean_df = pd.DataFrame(tvolmean)
tvol_mean_df.reset_index(inplace=True)
tvol_mean_df.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [3]:
tvol_err = (trial_df.groupby(['Drug','Timepoint'])['Tumor Volume (mm3)']).sem()

tvol_sems_df = pd.DataFrame(tvol_err)
tvol_sems_df.reset_index(inplace=True)

tvol_sems_df.head()


Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [4]:
tvol_mean_dfp = tvol_mean_df.pivot(index='Timepoint',columns='Drug',values='Tumor Volume (mm3)')
tvol_sems_dfp = tvol_sems_df.pivot(index='Timepoint',columns='Drug',values='Tumor Volume (mm3)')

tvol_mean_dfp.reset_index(inplace=True)
tvol_sems_dfp.reset_index(inplace=True)

tvol_mean_dfp


Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
1,5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
2,10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
3,15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
4,20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
5,25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
6,30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
7,35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
8,40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
9,45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


In [6]:
fig = plt.figure()

tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],
                             tvol_mean_dfp['Ketapril'],'rD--',
                             mec='k',lw=1.0,label='Ketapril')
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],
                             tvol_mean_dfp['Placebo'],'g^--',
                             mec='k',lw=1.0,label='Placebo')
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],
                             tvol_mean_dfp['Infubinol'],'bo--',
                             mec='k',lw=1.0,label='Infubinol')
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],
                             tvol_mean_dfp['Capomulin'],'ms--',
                             mec='k',lw=1.0,label='Capomulin')

plt.errorbar(tvol_mean_dfp['Timepoint'],
             tvol_mean_dfp['Ketapril'],
             yerr=tvol_sems_dfp['Ketapril'],
             xerr=None, fmt='none',ecolor='r')

plt.errorbar(tvol_mean_dfp['Timepoint'],
             tvol_mean_dfp['Placebo'],
             yerr=tvol_sems_dfp['Placebo'],
             xerr=None, fmt='none',ecolor='g')

plt.errorbar(tvol_mean_dfp['Timepoint'],
             tvol_mean_dfp['Infubinol'],
             yerr=tvol_sems_dfp['Infubinol'],
             xerr=None, fmt='none',ecolor='b')

plt.errorbar(tvol_mean_dfp['Timepoint'],
             tvol_mean_dfp['Capomulin'],
             yerr=tvol_sems_dfp['Capomulin'],
             xerr=None, fmt='none',ecolor='m')

columnsmin=(tvol_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].min())
columnsmax=(tvol_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].max())
ymin = columnsmin.min()
ymax = columnsmax.max()

title='Tumor Reponse to Treatment'

plt.title(title)
plt.xlabel('Time (days)')
plt.ylabel('Tumor Vol (mm$^3$)')
plt.legend(loc='best')
plt.xlim((tvol_mean_dfp['Timepoint'].min())-1,tvol_mean_dfp['Timepoint'].max()+2)
plt.ylim(ymin-5,ymax+5)
plt.grid(axis='y')
plt.tight_layout()

fig_label='Figure_1'
plt.savefig(f"../Images/output_figures/{fig_label}_ZM.png")

fig.show()


<IPython.core.display.Javascript object>

## Metastatic Response to Treatment

In [7]:
metsite_mean_df = pd.DataFrame(trial_df.groupby(['Drug','Timepoint'])['Metastatic Sites'].mean())
metsite_mean_df.reset_index(inplace=True)
metsite_mean_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [8]:
metsite_sems_df = pd.DataFrame((trial_df.groupby(['Drug','Timepoint'])['Metastatic Sites']).sem())
metsite_sems_df.reset_index(inplace=True)
metsite_sems_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621


In [9]:
metsite_mean_dfp = metsite_mean_df.pivot(index='Timepoint',columns='Drug',values='Metastatic Sites')
metsite_mean_dfp.reset_index(inplace=True)
metsite_sems_dfp = metsite_sems_df.pivot(index='Timepoint',columns='Drug',values='Metastatic Sites')
metsite_sems_dfp.reset_index(inplace=True)
metsite_sems_dfp.head()

Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
2,10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
3,15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
4,20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378


In [10]:
fig = plt.figure()

metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],
                                metsite_mean_dfp['Ketapril'],'rD--',
                                mec='k',lw=1.0,label='Ketapril')

metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],
                                metsite_mean_dfp['Placebo'],'g^--',
                                mec='k',lw=1.0,label='Placebo')

metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],
                                metsite_mean_dfp['Infubinol'],'bo--',
                                mec='k',lw=1.0,label='Infubinol')

metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],
                                metsite_mean_dfp['Capomulin'],'ms--',
                                mec='k',lw=1.0,label='Capomulin')

plt.errorbar(metsite_mean_dfp['Timepoint'],
             metsite_mean_dfp['Ketapril'],
             yerr=metsite_sems_dfp['Ketapril'],
             xerr=None, fmt='none',ecolor='r')

plt.errorbar(metsite_mean_dfp['Timepoint'],
             metsite_mean_dfp['Placebo'],
             yerr=metsite_sems_dfp['Placebo'],
             xerr=None, fmt='none',ecolor='g')

plt.errorbar(metsite_mean_dfp['Timepoint'],
             metsite_mean_dfp['Infubinol'],
             yerr=metsite_sems_dfp['Infubinol'],
             xerr=None, fmt='none',ecolor='b')

plt.errorbar(metsite_mean_dfp['Timepoint'],
             metsite_mean_dfp['Capomulin'],
             yerr=metsite_sems_dfp['Capomulin'],
             xerr=None, fmt='none',ecolor='m')

columnsmin=(metsite_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].min())
columnsmax=(metsite_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].max())
ymin = columnsmin.min()
ymax = columnsmax.max()

title='Metastatic Spread During Treatment'

plt.title(title)
plt.xlabel('Treatment Duration (days)')
plt.ylabel('Metastatic Sites')
plt.legend(loc='best')
plt.xlim((metsite_mean_dfp['Timepoint'].min())-1,metsite_mean_dfp['Timepoint'].max()+2)
plt.ylim(ymin-0.25,ymax+1)
plt.grid(axis='y')
plt.tight_layout()

fig_label='Figure_2'
plt.savefig(f"../Images/output_figures/{fig_label}_ZM.png")

fig.show()

<IPython.core.display.Javascript object>

## Survival Rates

In [11]:
count_mice_df = pd.DataFrame(trial_df.groupby(['Drug','Timepoint'])['Mouse ID'].count())
count_mice_df.reset_index(inplace=True)
count_mice_df.head()

Unnamed: 0,Drug,Timepoint,Mouse ID
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [12]:
count_mice_dfp = count_mice_df.pivot(index='Timepoint',columns='Drug',values='Mouse ID')
count_mice_dfp

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17
25,22,18,18,19,18,17,14,23,19,16
30,22,16,17,18,15,15,13,23,18,15
35,22,14,12,17,15,14,10,21,16,14
40,21,14,10,15,15,12,9,20,12,14
45,21,13,9,11,13,11,7,20,11,14


In [13]:
new_df = pd.DataFrame(count_mice_dfp.transform(lambda x: x/count_mice_dfp.iloc[0]*100,axis=1))
new_df.reset_index(inplace=True)

In [14]:
new_df

Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
1,5,100.0,84.0,100.0,92.0,92.0,96.0,96.153846,100.0,96.153846,96.0
2,10,100.0,80.0,84.0,88.0,84.0,96.0,88.461538,96.0,88.461538,88.0
3,15,96.0,76.0,84.0,76.0,84.0,80.0,65.384615,96.0,88.461538,84.0
4,20,92.0,72.0,80.0,76.0,80.0,76.0,65.384615,92.0,80.769231,68.0
5,25,88.0,72.0,72.0,76.0,72.0,68.0,53.846154,92.0,73.076923,64.0
6,30,88.0,64.0,68.0,72.0,60.0,60.0,50.0,92.0,69.230769,60.0
7,35,88.0,56.0,48.0,68.0,60.0,56.0,38.461538,84.0,61.538462,56.0
8,40,84.0,56.0,40.0,60.0,60.0,48.0,34.615385,80.0,46.153846,56.0
9,45,84.0,52.0,36.0,44.0,52.0,44.0,26.923077,80.0,42.307692,56.0


In [15]:
fig = plt.figure()

surv_scatter = plt.plot(new_df['Timepoint'],
                        new_df['Ketapril'],'rD--',
                        mec='k',lw=1.0,label='Ketapril')

surv_scatter = plt.plot(new_df['Timepoint'],
                        new_df['Placebo'],'g^--',
                        mec='k',lw=1.0,label='Placebo')

surv_scatter = plt.plot(new_df['Timepoint'],
                        new_df['Infubinol'],'bo--',
                        mec='k',lw=1.0,label='Infubinol')

surv_scatter = plt.plot(new_df['Timepoint'],
                        new_df['Capomulin'],'ms--',
                        mec='k',lw=1.0,label='Capomulin')


columnsmin=(new_df[['Capomulin','Infubinol','Ketapril','Placebo']].min())
columnsmax=(new_df[['Capomulin','Infubinol','Ketapril','Placebo']].max())
ymin = columnsmin.min()
ymax = columnsmax.max()

title='Survival During Treatment'

plt.title(title)
plt.xlabel('Time (days)')
plt.ylabel('Survival Rate (%)')
plt.legend(loc='best')
plt.xlim((new_df['Timepoint'].min())-1,new_df['Timepoint'].max()+2)
plt.ylim(ymin-2.5,ymax+2.5)
plt.grid(axis='y')
plt.tight_layout()

fig_label='Figure_3'
plt.savefig(f"../Images/output_figures/{fig_label}_ZM.png")

fig.show()

<IPython.core.display.Javascript object>

\*\*\*\*\*STOP!!\*\*\*\*\*

Please ensure that the succeeding cell actually runs before moving past it. For some reason the kernel tended to not run the cell despite me having input Shift+Enter.

Thank you; you may proceed.

In [16]:
tvol_mean_dfp.set_index('Timepoint',inplace=True)
tvol_mean_dfp

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


## Summary Bar Graph

In [17]:
treatments=['Capomulin','Infubinol','Ketapril','Placebo']

col1=[tvol_mean_dfp.at[0,'Capomulin'],
    tvol_mean_dfp.at[0,'Infubinol'],
    tvol_mean_dfp.at[0,'Ketapril'],
    tvol_mean_dfp.at[0,'Placebo']
     ]

col2=[tvol_mean_dfp.at[45,'Capomulin'],
    tvol_mean_dfp.at[45,'Infubinol'],
    tvol_mean_dfp.at[45,'Ketapril'],
    tvol_mean_dfp.at[45,'Placebo']
     ]

col3=[((col2[0]-col1[0])/col1[0]*100),
     ((col2[1]-col1[1])/col1[1]*100),
     ((col2[2]-col1[2])/col1[2]*100),
     ((col2[3]-col1[3])/col1[3]*100)
     ]

data_dict = {'Treatment':treatments,
             'Initial Mean Tumor Vol':col1,
             'Final Mean Tumor Vol':col2,
             'Percent Change':col3}

change_df = pd.DataFrame(data=data_dict)


In [18]:
change_df

Unnamed: 0,Treatment,Initial Mean Tumor Vol,Final Mean Tumor Vol,Percent Change
0,Capomulin,45.0,36.236114,-19.475303
1,Infubinol,45.0,65.755562,46.123472
2,Ketapril,45.0,70.662958,57.028795
3,Placebo,45.0,68.084082,51.29796


In [19]:
relevant_pc=(change_df.loc[:,'Percent Change'])
relevant_treatment=(change_df.loc[:,'Treatment'])

x_axis = np.arange(len(relevant_pc))
xtick_locations = [value for value in x_axis]
width=0.95
colors=[]
for value in relevant_pc:
    if value<0:
        colors.append('lime')
    else:
        colors.append('red')

fig, ax = plt.subplots()
bar_plot = plt.bar(x_axis,relevant_pc,width=width,color=colors,linewidth=5,align='center')
ax.set_title('%-Change of Mean Tumor Volumes \n Over 45-day Treatment Period')
plt.ylabel('%-Change of Mean Tumor Volume')
plt.xlabel('Treatment Group')
plt.xticks(xtick_locations,relevant_treatment)
plt.hlines(0,x_axis.min()-width-0.25,x_axis.max()+width+0.25,color='k',linewidth=0.75 )
plt.grid(axis='y',color='k',linewidth=0.5,linestyle='--')
plt.xlim((x_axis.min()-width-0.25),(x_axis.max()+width+0.25))
plt.ylim((4/3*(relevant_pc.min())),(4/3*(relevant_pc.max())))
label_height=-relevant_pc.min()/2

heights=[]
for value in relevant_pc:
    if value<0:
        heights.append(-(label_height))
    else:
        heights.append(label_height)
print(heights)

def autolabel(rects):
    for idx,rect in enumerate(bar_plot):   
        ax.text(rect.get_x() + rect.get_width()/2.,
            heights[idx],
            str(relevant_pc[idx].round(1))+'%',
            ha='center',
            va='bottom',
            rotation=0,
            backgroundcolor='white')
        

autolabel(bar_plot)
plt.tight_layout()
fig_label='Figure_4'
plt.savefig(f"../Images/output_figures/{fig_label}_ZM.png")
fig.show()


<IPython.core.display.Javascript object>

[-9.737651333947078, 9.737651333947078, 9.737651333947078, 9.737651333947078]


![Metastatic Spread During Treatment](../Images/change.png)

## Findings

### Figure 1. Tumor Response to Treatment
Based on Figure 1, which shows the mean tumor volume over time when grouped by treatment, it appears that only one of the four treatment groups of interest demonstrated a negative linear relationship between time and treatment, which was the Capomulin group. While inhibiting the growth of cancerous tumors is important, so too is stopping cancer cell metastasis to prevent spread to vital organs. Further investigation should be performed before deciding if Capomulin would be a good treatment, however. Additional considerations include: efficacy in limiting metastsis, dosage quantity, cost, and treatmnt side effects. 

### Figure 2. Metastatic Spread During Treatment
Based on Figure 2, which depicts the average number of metastatic sites over time for each treatment group, it appears that the Capomulin group experienced the least amount of metastatic spread over the course of the trial. The Capomulin data may contain less error than the other groups' data, as shown by the smaller error bars. Not only is the error of the Capomulin group lower relative to the others, it is also changes less rapidly, which points to the sample population for that group decreasing less than the others.

### Figure 3. Survival During Treatment
Figure 3 shows the percentage of the original population that has survived at each timpoint over the course of the trial. The figure depicts that the Capomulin group demonstrated the slowest decline in population. The Capomulin group population appears to have a lower amount of variance in the group's rate of decline, while the other groups' trends have more pronounced inflection.

### Figure 4. %-Change of Mean Tumor Volumes.
Figure 4 depicts the average change in tumor size between the first and last timpoints for each group. The green (good) values represent a reduction in mean tumor volume and the red (bad) values represent an increase in mean tumor volume. The Capomulin group demonstrated an average tumor volume reduction of about 20% over 45 days, whereas the other groups' mean tumor volumes increased by about half of their original volumes. Further investigation should be of the trend displayed by Capomulin on a longer time scale. It would be interesting to see if Capomulin could decrease tumor volumes to the point of elimination prior to the population %-survival becoming unacceptably low. With Capomulin decreasing the tumor volume by 19.5% per 45-day period and the population decreasing by 16% per 45-day period, if both trends continued, about 18% of the treated population would survive by the time the mean tumor volume was near 0% of it's initial volume.