## Observations
* The mice treated with Capomulin and Ramicane experienced the least appearances of metastatic sites by the end of the treatment: $1.47\pm 0.40$ new metastatic sites for Capomulin, and $1.25\pm 0.19$ new metastatic sites under Ramicane (results stated with 95% confidence).
* The mice treated with Capomulin and Ramicane had the highest survival rates. By the end of the treatment, 84% of the mice treated with Capomulin survived, and 80% of the mice treated with Ramicane survived.
* The only treatments which produced a decrease in mean tumor volume were Capomulin (19% decrease) and Ramicane (22% decrease). The rest of the drugs produced increases in mean tumor volume with little to no significant difference from the placebo.

In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load
mouse_file = "data/mouse_drug_data.csv"
clinicaltrial_file = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_df = pd.read_csv(mouse_file)
trial_df = pd.read_csv(clinicaltrial_file)

# Combine the data into a single dataset
df = pd.merge(trial_df, mouse_df, on='Mouse ID').sort_values('Timepoint')

# Display the data table for preview
df.head()


## Tumor Response to Treatment

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint
mean_tumor_volume_s = df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean()

# Convert to DataFrame
mtv_df = mean_tumor_volume_s.to_frame()

# Preview DataFrame
mtv_df.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tv_err_s = df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].sem()

# Convert to DataFrame
tv_err_df = tv_err_s.to_frame()

# Preview DataFrame
tv_err_df.head()


In [None]:
# Minor Data Munging to Re-Format the Data Frames
mtv_df = mtv_df.unstack(0)['Tumor Volume (mm3)']
tv_err_df = tv_err_df.unstack(0)['Tumor Volume (mm3)']

# Preview that Reformatting worked
mtv_df

In [None]:
tv_err_df

In [None]:
# 95% confidence interval radii for final results
tv_err_df.iloc[-1] * 1.96

In [None]:
# Generate the Plot (with Error Bars)
# Get x axis values
x_axis = list(mtv_df.index.values)

# Get column names
drug_names = list(mtv_df)

plt.figure(figsize=(20,10))

mark = 'osv^<>XPhd'
for i,d in enumerate(drug_names):
    plt.errorbar(x_axis, mtv_df[d], yerr=tv_err_df[d], marker=mark[i] ,markersize=8, linewidth=1.6, label=d)

plt.xlabel('Time (Days)')
plt.ylabel('Tumor Volume (mm3)')
plt.title('Tumor Response to Treatment')
plt.grid()
plt.legend()
tumor_volume_fig = plt.gcf()

# Save the Figure
tumor_volume_fig.savefig('tumor_response.png')

# Show the Figure
tumor_volume_fig.show()

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint
mean_metastatic_sites_s = df.groupby(['Drug','Timepoint'])['Metastatic Sites'].mean()

# Convert to DataFrame
mms_df = mean_metastatic_sites_s.to_frame()

# Preview DataFrame
mms_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
err_metastatic_sites_s = df.groupby(['Drug','Timepoint'])['Metastatic Sites'].sem()

# Convert to DataFrame
ems_df = err_metastatic_sites_s.to_frame()

# Preview DataFrame
ems_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mms_df = mms_df.unstack(0)['Metastatic Sites']
ems_df = ems_df.unstack(0)['Metastatic Sites']

# Preview that Reformatting worked
mms_df

In [None]:
ems_df

In [None]:
# 95% confidence interval radii for final results
ems_df.iloc[-1] * 1.96

In [None]:
# Generate the Plot (with Error Bars)
plt.figure(figsize=(20,10))

for i,d in enumerate(drug_names):
    plt.errorbar(x_axis, mms_df[d], yerr=ems_df[d], marker=mark[i], markersize=8, linewidth=1.6, label=d)

plt.xlabel('Time (Days)')
plt.ylabel('Metastatic Sites')
plt.title('Metastatic Spread During Treatment')
plt.grid()
plt.legend()
metastatic_fig = plt.gcf()

# Save the Figure
metastatic_fig.savefig('metastatic_spread.png')

# Show the Figure
metastatic_fig.show()

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
nmice_s = df.groupby(['Drug', 'Timepoint'])['Mouse ID'].nunique()

# Convert to DataFrame
nmice_df = nmice_s.to_frame()

# Preview DataFrame
nmice_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
nmice_df = nmice_df.unstack(0)['Mouse ID']

# Preview the Data Frame
nmice_df

In [None]:
# Turn survival data into percentages and view dataframe
perc_mice_df = nmice_df * 100 / nmice_df.iloc[0]
perc_mice_df

In [None]:
# Generate plot of survival rates
plt.figure(figsize=(20,10))

for i,d in enumerate(drug_names):
    plt.plot(x_axis, perc_mice_df[d], marker=mark[i], markersize=8, linewidth=1.6, label=d)

plt.xlabel('Time (Days)')
plt.ylabel('Survival Rate (%)')
plt.title('Survival During Treatment')
plt.grid()
plt.legend()
survival_fig = plt.gcf()

# Save the Figure
survival_fig.savefig('survival.png')

# Show the Figure
survival_fig.show()

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug
perc_changes_s = 100 * (mtv_df.iloc[-1]/mtv_df.iloc[0] - 1)

# Display the data to confirm
perc_changes_s

In [None]:
# Store all Relevant Percent Changes into a Tuple
perc_changes = tuple(perc_changes_s)

# Splice the data between passing and failing drugs
bar_colors = ['r' if p >= 0 else 'g' for p in perc_changes]

# Orient widths. Add labels, tick marks, etc.
plt.figure(figsize=(15, 10))
box_width_scale = 1
bar_axis = np.arange(len(perc_changes)) * box_width_scale
plt.xticks(bar_axis + box_width_scale, drug_names)
plt.yticks(np.arange(-20,80,20))
plt.grid()

plt.bar(bar_axis + box_width_scale/2, perc_changes, color=bar_colors, width=box_width_scale)
plt.ylabel('% Tumor Volume Change')
plt.title('Tumor Change Over 45 Day Treatment')

# Use functions to label the percentages of changes
def label_y (v):
    if v >= 0:
        return 2
    else:
        return -4

# Call functions to implement the function calls
for i,v in enumerate(perc_changes):
    plt.text(bar_axis[i] + box_width_scale/3, label_y(v), '{:d}%'.format(int(v)), color='w', fontsize='x-large')

# Save the Figure
change_fig = plt.gcf()
change_fig.savefig('percent_changes.png')

# Show the Figure
change_fig.show()