In [1]:
%matplotlib notebook

In [2]:
# Dependencies and Setup

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import matplotlib
matplotlib.pyplot.plot
matplotlib.axes.Axes.plot

import itertools

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug_df = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_df = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
complete_data = pd.merge(mouse_drug_df, clinical_trial_df, on="Mouse ID")
complete_data

# Display the data table for preview
complete_data.head()


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [3]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_tumor_volume = complete_data.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean()
# Convert to DataFrame
mean_tumor_volume_df = mean_tumor_volume.to_frame(name='Tumor Volume (mm3)').reset_index()

# Preview DataFrame
mean_tumor_volume_df

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.000000
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
...,...,...,...
95,Zoniferol,25,55.432935
96,Zoniferol,30,57.713531
97,Zoniferol,35,60.089372
98,Zoniferol,40,62.916692


In [4]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
sem_tumor_volume = complete_data.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].sem()
# Convert to DataFrame
sem_tumor_volume_df = sem_tumor_volume.to_frame(name='Tumor Volume (mm3)').reset_index()

# Preview DataFrame
sem_tumor_volume_df

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.000000
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731
...,...,...,...
95,Zoniferol,25,0.602513
96,Zoniferol,30,0.800043
97,Zoniferol,35,0.881426
98,Zoniferol,40,0.998515


In [5]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_tumor_volume_mean = mean_tumor_volume_df.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
# Preview that Reformatting worked
reformatted_tumor_volume_mean

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


In [6]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_sem_tumor_volume = sem_tumor_volume_df.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
# Preview that Reformatting worked
reformatted_sem_tumor_volume

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.448593,0.164505,0.235102,0.264819,0.202385,0.218091,0.231708,0.482955,0.239862,0.18895
10,0.702684,0.236144,0.282346,0.357421,0.319415,0.402064,0.376195,0.720225,0.433678,0.263949
15,0.838617,0.332053,0.357705,0.580268,0.444378,0.614461,0.466109,0.770432,0.493261,0.370544
20,0.909731,0.359482,0.47621,0.726484,0.59526,0.839609,0.555181,0.786199,0.621889,0.533182
25,0.881642,0.439356,0.550315,0.755413,0.813706,1.034872,0.577401,0.746991,0.741922,0.602513
30,0.93446,0.49062,0.631061,0.934121,0.975496,1.218231,0.746045,0.864906,0.899548,0.800043
35,1.052241,0.692248,0.984155,1.127867,1.013769,1.287481,1.084929,0.967433,1.003186,0.881426
40,1.223608,0.708505,1.05522,1.158449,1.118567,1.370634,1.564779,1.128445,1.410435,0.998515
45,1.223977,0.902358,1.144427,1.453186,1.416363,1.351726,1.888586,1.226805,1.576556,1.003576


In [7]:
drug = mean_tumor_volume_df['Drug'].unique()
drug

array(['Capomulin', 'Ceftamin', 'Infubinol', 'Ketapril', 'Naftisol',
       'Placebo', 'Propriva', 'Ramicane', 'Stelasyn', 'Zoniferol'],
      dtype=object)

In [8]:
mean_sem_tumor_volume_df = pd.merge(mean_tumor_volume_df, sem_tumor_volume_df, on=['Drug','Timepoint'])
mean_sem_tumor_volume_df = mean_sem_tumor_volume_df.rename(columns={"Tumor Volume (mm3)_x": "Tumor Volume", "Tumor Volume (mm3)_y": "sem"})

In [9]:
# Generate the Plot (with Error Bars)
ax = plt.gca()
marker = itertools.cycle(('+', '.', 'o', '*')) 
somthing = [5,10]

plt.figure(1)

for x in drug:
    (mean_sem_tumor_volume_df.loc[mean_sem_tumor_volume_df['Drug'] == x]).plot(kind='line', x='Timepoint', y='Tumor Volume', yerr='sem' , marker = next(marker), linestyle=':',ax=ax, label=x)

plt.title('Tumor Response to Treatment')
plt.xlabel('Time (Days)')
plt.ylabel('Tumor Volume (mm3)')
plt.legend(loc=2)
plt.grid(axis='y')
plt.xlim(-5, 50)

# Save the Figure
plt.savefig("images/tumor_response.png")

# Show the Figure
plt.show()

<IPython.core.display.Javascript object>

## Metastatic Response to Treatment

In [10]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
mean_met_site = complete_data.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].mean()
# Convert to DataFrame
mean_met_site_df = mean_met_site.to_frame(name='Metastatic Sites').reset_index()

# Preview DataFrame
mean_met_site_df

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.000000
1,Capomulin,5,0.160000
2,Capomulin,10,0.320000
3,Capomulin,15,0.375000
4,Capomulin,20,0.652174
...,...,...,...
95,Zoniferol,25,1.687500
96,Zoniferol,30,1.933333
97,Zoniferol,35,2.285714
98,Zoniferol,40,2.785714


In [11]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
sem_met_site = complete_data.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].sem()
# Convert to DataFrame
sem_met_site_df = sem_met_site.to_frame(name='Metastatic Sites').reset_index()

# Preview DataFrame
sem_met_site_df

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.000000
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621
...,...,...,...
95,Zoniferol,25,0.236621
96,Zoniferol,30,0.248168
97,Zoniferol,35,0.285714
98,Zoniferol,40,0.299791


In [12]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_mean_met_site = mean_met_site_df.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
# Preview that Reformatting worked
reformatted_mean_met_site

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667
10,0.32,0.6,0.666667,0.590909,0.52381,0.833333,0.565217,0.25,0.478261,0.5
15,0.375,0.789474,0.904762,0.842105,0.857143,1.25,0.764706,0.333333,0.782609,0.809524
20,0.652174,1.111111,1.05,1.210526,1.15,1.526316,1.0,0.347826,0.952381,1.294118
25,0.818182,1.5,1.277778,1.631579,1.5,1.941176,1.357143,0.652174,1.157895,1.6875
30,1.090909,1.9375,1.588235,2.055556,2.066667,2.266667,1.615385,0.782609,1.388889,1.933333
35,1.181818,2.071429,1.666667,2.294118,2.266667,2.642857,2.3,0.952381,1.5625,2.285714
40,1.380952,2.357143,2.1,2.733333,2.466667,3.166667,2.777778,1.1,1.583333,2.785714
45,1.47619,2.692308,2.111111,3.363636,2.538462,3.272727,2.571429,1.25,1.727273,3.071429


In [13]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_sem_met_site = sem_met_site_df.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
# Preview that Reformatting worked
reformatted_sem_met_site

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378
25,0.181818,0.258831,0.265753,0.288275,0.18524,0.263888,0.199095,0.11943,0.219824,0.236621
30,0.172944,0.249479,0.227823,0.347467,0.266667,0.300264,0.266469,0.139968,0.230641,0.248168
35,0.169496,0.266526,0.224733,0.361418,0.330464,0.341412,0.366667,0.145997,0.240983,0.285714
40,0.17561,0.289128,0.314466,0.315725,0.321702,0.297294,0.433903,0.160591,0.312815,0.299791
45,0.202591,0.286101,0.30932,0.278722,0.351104,0.30424,0.428571,0.190221,0.359062,0.2864


In [14]:
mean_sem_met_site_df = pd.merge(mean_met_site_df, sem_met_site_df, on=['Drug','Timepoint'])
mean_sem_met_site_df = mean_sem_met_site_df.rename(columns={"Metastatic Sites_x": "Metastatic Sites", "Metastatic Sites_y": "sem"})
mean_sem_met_site_df

Unnamed: 0,Drug,Timepoint,Metastatic Sites,sem
0,Capomulin,0,0.000000,0.000000
1,Capomulin,5,0.160000,0.074833
2,Capomulin,10,0.320000,0.125433
3,Capomulin,15,0.375000,0.132048
4,Capomulin,20,0.652174,0.161621
...,...,...,...,...
95,Zoniferol,25,1.687500,0.236621
96,Zoniferol,30,1.933333,0.248168
97,Zoniferol,35,2.285714,0.285714
98,Zoniferol,40,2.785714,0.299791


In [15]:
# Generate the Plot (with Error Bars)
plt.figure(2)

ax = plt.gca()
marker = itertools.cycle(('+', '.', 'o', '*')) 

for x in drug:
    (mean_sem_met_site_df.loc[mean_sem_met_site_df['Drug'] == x]).plot(kind='line',x='Timepoint',y='Metastatic Sites', yerr ='sem', marker = next(marker), linestyle=':',ax=ax, label=x)

plt.title('Metastatic Spread During Treatment')
plt.xlabel('Time (Days)')
plt.ylabel('Met. Sites')
plt.legend(loc=2)
plt.grid(axis='y')
plt.xlim(-5, 50)

# Save the Figure
plt.savefig("images/met_spread.png")

# Show the Figure
plt.show()

<IPython.core.display.Javascript object>

## Survival Rates

In [16]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mice_count = complete_data.groupby(['Drug', 'Timepoint'])['Mouse ID'].count()
mice_count_df = mice_count.to_frame(name='Mice Count').reset_index()

# Preview DataFrame
mice_count_df

Unnamed: 0,Drug,Timepoint,Mice Count
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23
...,...,...,...
95,Zoniferol,25,16
96,Zoniferol,30,15
97,Zoniferol,35,14
98,Zoniferol,40,14


In [17]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_mice_count_df = mice_count_df.pivot(index='Timepoint', columns='Drug', values='Mice Count')
# Preview the Data Frame
reformatted_mice_count_df

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17
25,22,18,18,19,18,17,14,23,19,16
30,22,16,17,18,15,15,13,23,18,15
35,22,14,12,17,15,14,10,21,16,14
40,21,14,10,15,15,12,9,20,12,14
45,21,13,9,11,13,11,7,20,11,14


In [18]:
mice_count_df = mice_count_df.sort_values(by=['Drug','Timepoint'], ascending=True)

In [19]:
beggining_count = mice_count_df.groupby(['Drug'])['Mice Count'].first()
mice_count_df = pd.merge(mice_count_df, beggining_count, on=['Drug'])
mice_count_df = mice_count_df.rename(columns={"Mice Count_y": "Beginning Count","Mice Count_x": "Mice Count"})
mice_count_df

Unnamed: 0,Drug,Timepoint,Mice Count,Beginning Count
0,Capomulin,0,25,25
1,Capomulin,5,25,25
2,Capomulin,10,25,25
3,Capomulin,15,24,25
4,Capomulin,20,23,25
...,...,...,...,...
95,Zoniferol,25,16,25
96,Zoniferol,30,15,25
97,Zoniferol,35,14,25
98,Zoniferol,40,14,25


In [20]:
mice_count_df['Survival Rate'] = (mice_count_df['Mice Count'] / mice_count_df['Beginning Count']) * 100
mice_count_df

Unnamed: 0,Drug,Timepoint,Mice Count,Beginning Count,Survival Rate
0,Capomulin,0,25,25,100.0
1,Capomulin,5,25,25,100.0
2,Capomulin,10,25,25,100.0
3,Capomulin,15,24,25,96.0
4,Capomulin,20,23,25,92.0
...,...,...,...,...,...
95,Zoniferol,25,16,25,64.0
96,Zoniferol,30,15,25,60.0
97,Zoniferol,35,14,25,56.0
98,Zoniferol,40,14,25,56.0


In [21]:
# Generate the Plot (Accounting for percentages)

plt.figure(3)

ax = plt.gca()
marker = itertools.cycle(('+', '.', 'o', '*')) 

for x in drug:
    (mice_count_df.loc[mice_count_df['Drug'] == x]).plot(kind='line', x='Timepoint', y='Survival Rate', marker = next(marker), linestyle=':',ax=ax, label=x)

plt.title('Survival During Treatment')
plt.xlabel('Time (Days)')
plt.ylabel('Survival Rate (%)')
plt.legend(loc=3)
plt.grid(axis='y')
plt.xlim(-5, 50)

# Save the Figure
plt.savefig("images/survival_rate.png")

# Show the Figure
plt.show()


<IPython.core.display.Javascript object>

## Summary Bar Graph

In [22]:
# Calculate the percent changes for each drug
mean_tumor_volume_df = mean_tumor_volume_df.sort_values(by=['Drug','Timepoint'], ascending=True)

startend_tumor_volume = pd.DataFrame()
startend_tumor_volume['Drug'] = mean_tumor_volume_df.groupby(['Drug'])['Drug'].first()
startend_tumor_volume['Starting Volume'] = mean_tumor_volume_df.groupby(['Drug'])['Tumor Volume (mm3)'].first()
startend_tumor_volume['Ending Volume'] = mean_tumor_volume_df.groupby(['Drug'])['Tumor Volume (mm3)'].last()
startend_tumor_volume['Percent Change'] = ((startend_tumor_volume['Ending Volume'] - startend_tumor_volume['Starting Volume']) / startend_tumor_volume['Starting Volume'] ) * 100

# Display the data to confirm
startend_tumor_volume

Unnamed: 0_level_0,Drug,Starting Volume,Ending Volume,Percent Change
Drug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Capomulin,Capomulin,45.0,36.236114,-19.475303
Ceftamin,Ceftamin,45.0,64.132421,42.516492
Infubinol,Infubinol,45.0,65.755562,46.123472
Ketapril,Ketapril,45.0,70.662958,57.028795
Naftisol,Naftisol,45.0,69.265506,53.923347
Placebo,Placebo,45.0,68.084082,51.29796
Propriva,Propriva,45.0,66.258529,47.241175
Ramicane,Ramicane,45.0,34.955595,-22.3209
Stelasyn,Stelasyn,45.0,68.43831,52.085134
Zoniferol,Zoniferol,45.0,65.960888,46.579751


In [23]:
startend_tumor_volume.reset_index(drop=True)

Unnamed: 0,Drug,Starting Volume,Ending Volume,Percent Change
0,Capomulin,45.0,36.236114,-19.475303
1,Ceftamin,45.0,64.132421,42.516492
2,Infubinol,45.0,65.755562,46.123472
3,Ketapril,45.0,70.662958,57.028795
4,Naftisol,45.0,69.265506,53.923347
5,Placebo,45.0,68.084082,51.29796
6,Propriva,45.0,66.258529,47.241175
7,Ramicane,45.0,34.955595,-22.3209
8,Stelasyn,45.0,68.43831,52.085134
9,Zoniferol,45.0,65.960888,46.579751


In [24]:
new = startend_tumor_volume.pivot(index='Starting Volume', columns='Drug', values='Percent Change')
new = new.reset_index()

In [32]:
xs = tuple(list(new.columns))
xs = np.arange(len(xs))

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [44]:
ys = new.values[0]
ys = ys.tolist()
ys = tuple(ys)
ys = ys[1:]

(-19.475302667894173,
 42.51649185589744,
 46.12347172785187,
 57.028794686606076,
 53.92334713476923,
 51.29796048315153,
 47.24117486320637,
 -22.320900462766673,
 52.085134287899024,
 46.57975086509525)

In [50]:
plt.figure(4)

ax = plt.gca()

startend_tumor_volume['positive'] = startend_tumor_volume['Percent Change'] > 0
startend_tumor_volume.plot(kind='bar', x='Drug', y='Percent Change', width=.9, color=startend_tumor_volume.positive.map({True: 'r', False: 'g'}),legend=None, ax=ax)

for x,y in zip(xs,ys):

    # Use functions to label the percentages of changes
    label = "{:.0f}%".format(y)
    
    if y>0:
        plt.annotate(label, # this is the text
             (x,0), # this is the point to label
             textcoords="offset points", # how to position the text
             xytext=(0,10), # distance from text to points (x,y)
             ha='center',
             color='white') # horizontal alignment can be left, right or center
    else:
        plt.annotate(label,
             (x,0),
             textcoords="offset points",
             xytext=(0,-15),
             ha='center',
             color='white')
    
plt.title('Tumor Change Over 45 Day Treatment')
plt.ylabel("% Tumor Change")
plt.grid(True)
plt.tight_layout()

# Show the Figure
plt.show()

# Save the Figure
plt.savefig("images/tumor_change.png")

<IPython.core.display.Javascript object>

In [None]:



# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 













## Observations