In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read in data
trial_data = pd.read_csv("Resources/clinicaltrial_data.csv")
mouse_data = pd.read_csv("Resources/mouse_drug_data.csv")

# Combine the data into a single dataset
combined_data = pd.merge(trial_data, mouse_data, on='Mouse ID', how="outer")

# Display the data table for preview
combined_data.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


## Tumor Response to Treatment

In [5]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_grouped_data = combined_data.groupby(['Drug', 'Timepoint']).mean()
mean_grouped_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3),Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1,Unnamed: 3_level_1
Capomulin,0,45.000000,0.000000
Capomulin,5,44.266086,0.160000
Capomulin,10,43.084291,0.320000
Capomulin,15,42.064317,0.375000
Capomulin,20,40.716325,0.652174
Capomulin,25,39.939528,0.818182
Capomulin,30,38.769339,1.090909
Capomulin,35,37.816839,1.181818
Capomulin,40,36.958001,1.380952
Capomulin,45,36.236114,1.476190


In [12]:
# Reset Index
mean_tumor_vol = mean_grouped_data.reset_index()

# Limit data frame to Tumor Volume
mean_tumor_vol = mean_tumor_vol.drop(columns=["Metastatic Sites"])
mean_tumor_vol.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [None]:
# Convert to DataFrame
mean_tumor_vol = mean_grouped_data.drop(columns='Metastatic Sites')
mean_tumor_vol_reset_index = mean_tumor_vol.reset_index()

mean_tumor_df = pd.DataFrame({'Drug':mean_tumor_vol_reset_index['Drug'],
                         'Timepoint':mean_tumor_vol_reset_index['Timepoint'],
                         'Mean Tumor Volume (mm3)': mean_tumor_vol_reset_index['Tumor Volume (mm3)']})

# Preview DataFrame
mean_tumor_df

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
import scipy.stats
from scipy.stats import sem

standard_error = grouped_data['Tumor Volume (mm3)'].sem()

# Convert to DataFrame
standard_error_reset_index = standard_error.reset_index()

stderr_tumor_df = pd.DataFrame({'Drug':standard_error_reset_index['Drug'],
                         'Timepoint':standard_error_reset_index['Timepoint'],
                         'SEM Tumor Volume (mm3)': standard_error_reset_index['Tumor Volume (mm3)']})


# Preview DataFrame
stderr_tumor_df.head(10)

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mean_tumor_df_reformat = mean_tumor_df.pivot(index="Timepoint", columns="Drug")

# Preview that Reformatting worked
mean_tumor_df_reformat

In [None]:
# x values
timepoints = np.arange(0, 50, 5)

# Capomulin: y and y-error
cap = mean_tumor_df_reformat.iloc[:,0]
cap_y = stderr_tumor_df.iloc[0:10,2]

# Infubinol: y and y-error
infu = mean_tumor_df_reformat.iloc[:,2]
infu_y = stderr_tumor_df.iloc[20:30,2]

# Ketapril: y and y-error
keta = mean_tumor_df_reformat.iloc[:,3]
keta_y = stderr_tumor_df.iloc[30:40,2]


# Placebo: y and y-error
plac = mean_tumor_df_reformat.iloc[:,5]
plac_y = stderr_tumor_df.iloc[50:60,2]


# Plot
plt.errorbar(timepoints, cap, yerr=cap_y, fmt='o', color='r', ecolor='r', label="Capomulin", linestyle='--', linewidth=0.5)
plt.errorbar(timepoints, infu, yerr=infu_y, fmt='^', color='b', ecolor='b', label="Infubinol", linestyle='--', linewidth=0.5)
plt.errorbar(timepoints, keta, yerr=keta_y, fmt='s', color='g', ecolor='g', label="Ketapril", linestyle='--', linewidth=0.5)
plt.errorbar(timepoints, plac, yerr=plac_y, fmt='d', color='k', ecolor='k', label="Placebo", linestyle='--', linewidth=0.5)

#Format
plt.title("Tumor Response to Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Average Tumor Volume (mm3)")
plt.legend()
plt.grid()
plt.show()

## Metastatic Response to Treatment

In [None]:
# Store the Mean Metastatic Site Data Grouped by Drug and Timepoint 
mean_met_site = mean_grouped_data.drop(columns='Tumor Volume (mm3)')

# Convert to DataFrame
mean_met_site_reset_index = mean_met_site.reset_index()

mean_met_df = pd.DataFrame({'Drug':mean_met_site_reset_index['Drug'],
                         'Timepoint':mean_met_site_reset_index['Timepoint'],
                         'Mean Metastatic Sites': mean_met_site_reset_index['Metastatic Sites']})

# Preview DataFrame
mean_met_df.head()

In [None]:
# Store the Standard Error associated with Metastatic Sites Grouped by Drug and Timepoint 
standard_error = grouped_data['Metastatic Sites'].sem()

# Convert to DataFrame
standard_error_reset_index = standard_error.reset_index()

stderr_met_df = pd.DataFrame({'Drug':standard_error_reset_index['Drug'],
                         'Timepoint':standard_error_reset_index['Timepoint'],
                         'SEM Metastatic Sites': standard_error_reset_index['Metastatic Sites']})

# Preview DataFrame
stderr_met_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mean_met_df_reformat = mean_met_df.pivot(index="Timepoint", columns="Drug")

# Preview that Reformatting worked
mean_met_df_reformat

In [None]:
# Capomulin: y and y-error
c_met = mean_met_df_reformat.iloc[:,0]
c_met_y = stderr_met_df.iloc[0:10,2]

# Infubinol: y and y-error
i_met = mean_met_df_reformat.iloc[:,2]
i_met_y = stderr_met_df.iloc[20:30,2]

# Ketapril: y and y-error
k_met = mean_met_df_reformat.iloc[:,3]
k_met_y = stderr_met_df.iloc[30:40,2]


# Placebo: y and y-error
p_met = mean_met_df_reformat.iloc[:,5]
p_met_y = stderr_met_df.iloc[50:60,2]


# Plot
plt.errorbar(timepoints, c_met, yerr=c_met_y, fmt='o', color='r', ecolor='r', label="Capomulin", linestyle='--', linewidth=0.5)
plt.errorbar(timepoints, i_met, yerr=i_met_y, fmt='^', color='b', ecolor='b', label="Infubinol", linestyle='--', linewidth=0.5)
plt.errorbar(timepoints, k_met, yerr=k_met_y, fmt='s', color='g', ecolor='g', label="Ketapril", linestyle='--', linewidth=0.5)
plt.errorbar(timepoints, p_met, yerr=p_met_y, fmt='d', color='k', ecolor='k', label="Placebo", linestyle='--', linewidth=0.5)

#Format
plt.title("Metastatic Spread During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Metastatic Sites")
plt.legend()
plt.grid()
plt.show()

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
grouped_mice = combined_data.groupby(['Drug', 'Timepoint'])
mouse_ct = grouped_mice['Mouse ID'].count()

# Convert to DataFrame
mouse_ct_reset = mouse_ct.reset_index()
mouse_ct_df = pd.DataFrame(mouse_ct_reset)
mouse_ct_df = mouse_ct_df.rename(columns={"Mouse ID":"Mouse Count"})

# Preview DataFrame
mouse_ct_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mouse_ct_df_reformat = mouse_ct_df.pivot(index="Timepoint", columns="Drug")

# Preview the Data Frame
mouse_ct_df_reformat

In [None]:
# y-values
c_survival_rate = mouse_ct_df_reformat.iloc[:,0]/mouse_ct_df_reformat.iloc[0,0] * 100
i_survival_rate = mouse_ct_df_reformat.iloc[:,2]/mouse_ct_df_reformat.iloc[0,0] * 100
k_survival_rate = mouse_ct_df_reformat.iloc[:,3]/mouse_ct_df_reformat.iloc[0,0] * 100
p_survival_rate = mouse_ct_df_reformat.iloc[:,5]/mouse_ct_df_reformat.iloc[0,0] * 100

# Plot
plt.plot(timepoints, c_survival_rate, '--o', c='r', label='Capomulin', linewidth=0.5)
plt.plot(timepoints, i_survival_rate, '--^', c='b', label='Infubinol', linewidth=0.5)
plt.plot(timepoints, k_survival_rate, '--s', c='g', label='Ketapril', linewidth=0.5)
plt.plot(timepoints, p_survival_rate, '--d', c='k', label='Placebo', linewidth=0.5)

plt.title('Survival During Treatment')
plt.xlabel('Time (Days)')
plt.ylabel('Survival Rate (%)')
plt.legend()
plt.grid()
plt.show()

## Summary Bar Graph

In [None]:
# Calculate the percent changes of tumor volume for each drug
first_timepoint = mean_tumor_df_reformat.iloc[0,:]
last_timepoint = mean_tumor_df_reformat.iloc[-1,:]

pct_change = ((last_timepoint - first_timepoint) / first_timepoint) * 100
pct_change

In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()