In [1]:
# Dependencies and Setup
%matplotlib notebook

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# set path to csv files
mouse_file = "data/mouse_drug_data.csv"
clinical_file = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data = pd.read_csv(mouse_file,low_memory=False)
clinical_data = pd.read_csv(clinical_file,low_memory=False)


# Combine the data into a single dataset
trial_df = pd.merge(clinical_data, mouse_data, how='outer', on='Mouse ID')
trial_df.tail()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
1901,m601,25,33.118756,1,Capomulin
1902,m601,30,31.758275,1,Capomulin
1903,m601,35,30.834357,1,Capomulin
1904,m601,40,31.378045,1,Capomulin
1905,m601,45,28.430964,1,Capomulin


## Tumor Response to Treatment

In [15]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint
tvolmean = trial_df.groupby(['Drug','Timepoint'])['Tumor Volume (mm3)'].mean()

# Convert to DataFrame
tvol_mean_df = pd.DataFrame(tvolmean)
tvol_mean_df.reset_index(inplace=True)

# Preview DataFrame
tvol_mean_df.head()

# aggfunc = {('Tumor Volume (mm3)'
# tvolmean_agg = trial_df.groupby(['Drug','Timepoint'])['Tumor Volume (mm3)'].agg()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [83]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tvol_err = (trial_df.groupby(['Drug','Timepoint'])['Tumor Volume (mm3)']).sem()
tvol_sems_df = pd.DataFrame(tvol_err)
tvol_sems_df.reset_index(inplace=True)
# Convert to DataFrame

# Preview DataFrame
tvol_sems_df.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [91]:
# Minor Data Munging to Re-Format the Data Frames
tvol_mean_dfp = tvol_mean_df.pivot(index='Timepoint',columns='Drug',values='Tumor Volume (mm3)')
tvol_mean_dfp.reset_index(inplace=True)
tvol_sems_dfp = tvol_sems_df.pivot(index='Timepoint',columns='Drug',values='Tumor Volume (mm3)')
tvol_sems_dfp.reset_index(inplace=True)
# Preview that Reformatting worked
tvol_mean_dfp

Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
1,5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
2,10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
3,15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
4,20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
5,25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
6,30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
7,35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
8,40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
9,45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


In [188]:
# Generate the Plot (with Error Bars)
# Capomulin, Infubinol, Ketapril, and Placebo
# plot(x, y, color='green', marker='o', linestyle='dashed',linewidth=2, markersize=12)
# tvol_mean_scatter = plt.plot(tvol_mean_dfp)
# plot(x1, y1, 'g^', x2, y2, 'g-')
plt.figure()
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Ketapril'],'rD--',mec='k',lw=1.0,label='Ketapril')
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Placebo'],'g^--',mec='k',lw=1.0,label='Placebo')
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Infubinol'],'bo--',mec='k',lw=1.0,label='Infubinol')
tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Capomulin'],'ms--',mec='k',lw=1.0,label='Capomulin')

plt.errorbar(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Ketapril'],
             yerr=tvol_sems_dfp['Ketapril'], xerr=None, fmt='none',ecolor='r')

plt.errorbar(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Placebo'],
             yerr=tvol_sems_dfp['Placebo'], xerr=None, fmt='none',ecolor='g')

plt.errorbar(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Infubinol'],
             yerr=tvol_sems_dfp['Infubinol'], xerr=None, fmt='none',ecolor='b')

plt.errorbar(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Capomulin'],
             yerr=tvol_sems_dfp['Capomulin'], xerr=None, fmt='none',ecolor='m')

title='Tumor Reponse to Treatment'
plt.legend(loc='best')
plt.title(title)
plt.xlabel('Time (days)')
plt.ylabel('Tumor Vol (mm$^3$)')
plt.xlim((tvol_mean_dfp['Timepoint'].min())-1,tvol_mean_dfp['Timepoint'].max()+2)
columnsmin=(tvol_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].min())
ymin = columnsmin.min()
columnsmax=(tvol_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].max())
ymax = columnsmax.max()
plt.ylim(ymin-5,ymax+5)
plt.grid(axis='y')
plt.tight_layout()
# Save the Figure
plt.savefig(f"../Images/{title}_ZM.png")

# tvol_mean_scatter = plt.scatter(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Capomulin'],marker="o",color="black")
# plot(a[0], a[1:])
# tvol_mean_scatter = plt.plot(tvol_mean_dfp['Timepoint'],tvol_mean_dfp['Placebo'])
# tvol_mean_scatter = tvol_mean_dfp.plot(x='Timepoint',y=['Capomulin','Infubinol','Ketapril','Placebo'],)
# drugs=['Capomulin','Infubinol','Ketapril','Placebo']
# tvol_mean_scatter = tvol_mean_dfp.plot(y=drugs[:])
# ax1 = tvol_mean_scatter.add_subplot(111)
# ax1.scatter('Timepoint','Capomulin',marker='$C$',label='Capomulin')
# ax1.scatter('Timepoint','Infubinol',marker='$I$',label='Infubinol')
# ax1.scatter('Timepoint','Ketapril',marker='$K$',label='Ketapril')
# ax1.scatter('Timepoint','Placebo',marker='$P$',label='Placebo')
# for drug in tvol_mean_df.columns:
#     ax1.scatter('Timepoint','Drug', marker='+',label='Drug')

<IPython.core.display.Javascript object>

In [168]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [169]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 

# Convert to DataFrame

# Preview DataFrame
metsite_mean_df = pd.DataFrame(trial_df.groupby(['Drug','Timepoint'])['Metastatic Sites'].mean())
metsite_mean_df.reset_index(inplace=True)
metsite_mean_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [170]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 

# Convert to DataFrame

# Preview DataFrame
metsite_sems_df = pd.DataFrame((trial_df.groupby(['Drug','Timepoint'])['Metastatic Sites']).sem())
metsite_sems_df.reset_index(inplace=True)
metsite_sems_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621


In [175]:
# Minor Data Munging to Re-Format the Data Frames
metsite_mean_dfp = metsite_mean_df.pivot(index='Timepoint',columns='Drug',values='Metastatic Sites')
metsite_mean_dfp.reset_index(inplace=True)
metsite_sems_dfp = metsite_sems_df.pivot(index='Timepoint',columns='Drug',values='Metastatic Sites')
metsite_sems_dfp.reset_index(inplace=True)
# Preview that Reformatting worked
metsite_sems_dfp.head()

Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
2,10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
3,15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
4,20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378


In [189]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure
# Minor Data Munging to Re-Format the Data Frames
plt.figure()
metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Ketapril'],'rD--',mec='k',lw=1.0,label='Ketapril')
metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Placebo'],'g^--',mec='k',lw=1.0,label='Placebo')
metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Infubinol'],'bo--',mec='k',lw=1.0,label='Infubinol')
metsite_mean_scatter = plt.plot(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Capomulin'],'ms--',mec='k',lw=1.0,label='Capomulin')

plt.errorbar(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Ketapril'],
             yerr=metsite_sems_dfp['Ketapril'], xerr=None, fmt='none',ecolor='r')

plt.errorbar(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Placebo'],
             yerr=metsite_sems_dfp['Placebo'], xerr=None, fmt='none',ecolor='g')

plt.errorbar(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Infubinol'],
             yerr=metsite_sems_dfp['Infubinol'], xerr=None, fmt='none',ecolor='b')

plt.errorbar(metsite_mean_dfp['Timepoint'],metsite_mean_dfp['Capomulin'],
             yerr=metsite_sems_dfp['Capomulin'], xerr=None, fmt='none',ecolor='m')

title='Metastatic Spread During Treatment'
plt.legend(loc='best')
plt.title(title)
plt.xlabel('Treatment Duration (days)')
plt.ylabel('Metastatic Sites')
plt.xlim((metsite_mean_dfp['Timepoint'].min())-1,metsite_mean_dfp['Timepoint'].max()+2)
columnsmin=(metsite_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].min())
ymin = columnsmin.min()
columnsmax=(metsite_mean_dfp[['Capomulin','Infubinol','Ketapril','Placebo']].max())
ymax = columnsmax.max()
plt.ylim(ymin-0.25,ymax+1)
plt.grid(axis='y')
plt.tight_layout()
# Save the Figure
plt.savefig(f"../Images/{title}_ZM.png")

<IPython.core.display.Javascript object>

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [195]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Convert to DataFrame

# Preview DataFrame
# count_mice_df = pd.DataFrame(trial_df.groupby(['Timepoint','Drug'])['Mouse ID'].count())
# count_mice_df.reset_index(inplace=True)
# count_mice_df.head()
p100func = (trial_df['Mouse ID'].count()) / (trial_df.iloc[0]) * 100

count_mice = trial_df.groupby(['Timepoint','Drug'])['Mouse ID'].agg(p100func)
count_mice.head()

TypeError: cannot convert the series to <class 'int'>

In [192]:
count_mice_dfp = count_mice_df.pivot(index='Timepoint',columns='Drug',values='Mouse ID')
count_mice_dfp.reset_index(inplace=True)
# p100_surv_mice = count_mice_dfp.iloc[:,:] / count_mice_dfp.iloc[0,:] * 100
# surv_mice_df = pd.DataFrame(index='Timepoint',columns='Drug',values=p100_surv_mice)
# count_mice_dfp['% Surviving'] = count_mice_dfp['Capomulin']
# new_df = pd.DataFrame(data=(count_mice_dfp.),index='Timepoint',columns='Drug')
surv_mice_df = count_mice_dfp.groupby()


# aggfunc = {('Tumor Volume (mm3)'
# tvolmean_agg = trial_df.groupby(['Drug','Timepoint'])['Tumor Volume (mm3)'].agg()

Drug,Timepoint,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
0,0,25,25,25,25,25,25,26,25,26,25
1,5,25,21,25,23,23,24,25,25,25,24
2,10,25,20,21,22,21,24,23,24,23,22
3,15,24,19,21,19,21,20,17,24,23,21
4,20,23,18,20,19,20,19,17,23,21,17
5,25,22,18,18,19,18,17,14,23,19,16
6,30,22,16,17,18,15,15,13,23,18,15
7,35,22,14,12,17,15,14,10,21,16,14
8,40,21,14,10,15,15,12,9,20,12,14
9,45,21,13,9,11,13,11,7,20,11,14


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)