In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load 
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data = pd.read_csv(mouse_drug_data_to_load)
drug_data = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
combined_df = pd.merge(mouse_data, drug_data, how="outer", on="Mouse ID")

# Display the data table for preview
combined_df.head()

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
drug_treatment_df = combined_df.groupby(["Drug", "Timepoint"])
means_tumor = drug_treatment_df['Tumor Volume (mm3)'].mean().reset_index()

# Convert to DataFrame
mean_tumor_response = pd.DataFrame(means_tumor)

# Preview DataFrame
mean_tumor_response.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
sem_drug_treatment_df = drug_treatment_df['Tumor Volume (mm3)'].sem().reset_index() 

# Convert to DataFrame
sem_tumor_response = pd.DataFrame(sem_drug_treatment_df)

# Preview DataFrame
sem_tumor_response.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
reorganize_tumor_mean_df = pd.pivot_table(mean_tumor_response, index=['Timepoint'], columns=['Drug'])
mean_response_per_drug = pd.DataFrame(reorganize_tumor_mean_df['Tumor Volume (mm3)'])

# Preview that Reformatting worked
mean_response_per_drug.head()

In [None]:
# re-format SEM DataFrame of mean tumor response
sem_reorganize_tumor_df = pd.pivot_table(sem_tumor_response, index=['Timepoint'], columns=['Drug'])
sem_response_per_drug = pd.DataFrame(sem_reorganize_tumor_df['Tumor Volume (mm3)'])

# Preview that Reformatting worked
sem_response_per_drug.head()


In [None]:
# Generate the Plot
fig, ax = plt.subplots()

x_axis = mean_response_per_drug.index
capomulin_handle = ax.errorbar(x_axis, mean_response_per_drug["Capomulin"], sem_response_per_drug["Capomulin"], marker="s", color="blue", linewidth=0.5)
infubinol_handle = ax.errorbar(x_axis, mean_response_per_drug["Infubinol"], sem_response_per_drug["Infubinol"], marker="^", color="tomato",linewidth=0.5)
ketapril_handle = ax.errorbar(x_axis, mean_response_per_drug["Ketapril"], sem_response_per_drug["Ketapril"], marker="v", color="indigo",linewidth=0.5)
placebo_handle = ax.errorbar(x_axis, mean_response_per_drug["Placebo"], sem_response_per_drug["Placebo"], marker="8", color="forestgreen",linewidth=0.5)


# set xlim 
plt.xlim(-2, max(x_axis)+3)

# Add labels, grid, legend 
plt.title("Tumor Response to Treatment")
plt.ylabel("Tumor Volume (mm3)")
plt.xlabel("Time (Days)")
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.grid()

# Save figure 

plt.gcf()
plt.savefig("Images/tumor_response.png")


# Show the Figure
plt.show()

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
metastatic_response = combined_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].mean()

# Convert to DataFrame
metastatic_df = pd.DataFrame(metastatic_response)

# Preview DataFrame
metastatic_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
sem_metastatic_response = combined_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].sem()

# Convert to DataFrame
metastatic_sem_df = pd.DataFrame(sem_metastatic_response)

# Preview DataFrame
metastatic_sem_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
reorganize_metastatic_df = pd.pivot_table(metastatic_df, index=['Timepoint'], columns=['Drug'])
metastatic_mean_df = pd.DataFrame(reorganize_metastatic_df['Metastatic Sites'])

# Preview that Reformatting worked
metastatic_mean_df

In [None]:
# re-format SEM DataFrame of metastatic tumor response 
reorganize_sem_metastatic_df = pd.pivot_table(metastatic_sem_df, index=['Timepoint'], columns=['Drug'])

metastatic_sem = pd.DataFrame(reorganize_sem_metastatic_df['Metastatic Sites'])
metastatic_sem

In [None]:
# Generate the Plot
fig, ax = plt.subplots()

x_axis = metastatic_mean_df.index
capomulin_handle = ax.errorbar(x_axis, metastatic_mean_df["Capomulin"], metastatic_sem["Capomulin"], marker="s", color="blue", linewidth=0.5)
infubinol_handle = ax.errorbar(x_axis, metastatic_mean_df["Infubinol"], metastatic_sem["Infubinol"], marker="^", color="tomato",linewidth=0.5)
ketapril_handle = ax.errorbar(x_axis, metastatic_mean_df["Ketapril"], metastatic_sem["Ketapril"], marker="v", color="indigo",linewidth=0.5)
placebo_handle = ax.errorbar(x_axis, metastatic_mean_df["Placebo"], metastatic_sem["Placebo"], marker="8", color="forestgreen",linewidth=0.5)


# set xlim 
plt.xlim(-2, max(x_axis)+3)

# Add labels, grid, legend 
plt.title("Metastatic Spread During Treatment")
plt.ylabel("Met. Sites")
plt.xlabel("Treatment Duration (Days)")
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.grid()

# Save figure 
plt.gcf()
plt.savefig("Images/metastatic_spread.png")

# Show figure 
plt.show()

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mouse_count = combined_df.groupby(["Drug", "Timepoint"])['Mouse ID'].count()

# Convert to DataFrame
mouse_count_df = pd.DataFrame(mouse_count)

# Preview DataFrame
mouse_count_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
combined_mouse_df = pd.pivot_table(mouse_count_df, index=['Timepoint'], columns=['Drug'])
mouse_counts_per_drug = pd.DataFrame(combined_mouse_df['Mouse ID'])

# Preview the Data Frame
mouse_counts_per_drug.head()

In [None]:
# create individual DataFrames for the mouse survival rate of the four selected drugs 
capomulin_survival = pd.DataFrame(mouse_counts_per_drug.iloc[:,0])
infubinol_survival = pd.DataFrame(mouse_counts_per_drug.iloc[:,2])
ketapril_survival = pd.DataFrame(mouse_counts_per_drug.iloc[:,3]) 
placebo_survival = pd.DataFrame(mouse_counts_per_drug.iloc[:,5])

# merge DataFrames for the metastatic mean of the four selected drugs 
combined_survival_rate = pd.merge(capomulin_survival, infubinol_survival, how="outer",on="Timepoint")
combined_survival_rate = pd.merge(combined_survival_rate, ketapril_survival, how="outer",on="Timepoint")
combined_survival_rate = pd.merge(combined_survival_rate, placebo_survival, how="outer", on="Timepoint")

# preview merged DataFrame 
combined_survival_rate

In [None]:
# create variable for array of values 
values = combined_survival_rate.values

# create for loop to calculate % survival across 45 days since day 0 
for i in values: 
    percentage = (values / values[0])*100


# create new dataframe     
mouse_percentage = pd.DataFrame(percentage)    

# rename ranges to corresponding Timepoints and Drug Treatment
renamed_df = mouse_percentage.rename({0: 0, 
                                      1: 5, 
                                      2: 10,
                                      3: 15, 
                                      4: 20, 
                                      5: 25, 
                                      6: 30, 
                                      7: 35, 
                                      8: 40, 
                                      9: 45}, 
                                     columns= {
                                        0: "Capomulin", 
                                        1: "Infubinol", 
                                        2: "Ketapril",
                                        3: "Placebo"})

renamed_df.index.names = ['Timepoint']

# preview renamed DataFrame
renamed_df

In [None]:
# Generate the Plot 
fig, ax = plt.subplots()

x_axis = renamed_df.index
capomulin_handle = ax.plot(x_axis, renamed_df["Capomulin"], marker="s", color="blue", linewidth=0.5)
infubinol_handle = ax.plot(x_axis, renamed_df["Infubinol"], marker="^", color="tomato",linewidth=0.5)
ketapril_handle = ax.plot(x_axis, renamed_df["Ketapril"], marker="v", color="indigo",linewidth=0.5)
placebo_handle = ax.plot(x_axis, renamed_df["Placebo"], marker="8", color="forestgreen",linewidth=0.5)


# set xlim, ylim
plt.xlim(-2, max(x_axis)+3)
plt.ylim(min(renamed_df["Infubinol"].values)-4, max(renamed_df["Capomulin"].values)+4)

# Add labels, grid, legend 
plt.title("Survival During Treatment")
plt.ylabel("Survival Rate (%)")
plt.xlabel("Treatment Duration (Days)")
plt.grid()
plt.legend(loc="best", fontsize="small", fancybox=True)


# Save figure 
plt.gcf()
plt.savefig("Images/survival_rates-1.png")

# Show figure 
plt.show()

In [None]:
# check previous DataFrame displaying tumor mean response for each drug 
mean_response_per_drug

In [None]:
# Calculate the percent changes for each drug
percent_change = ((mean_response_per_drug.iloc[-1,:] - mean_response_per_drug.iloc[0,:])/ 
                  mean_response_per_drug.iloc[0,:]) *100 

# Display the data to confirm
percent_change

In [None]:
# Store all Relevant Percent Changes into a Tuple
drugs_pc = [('Capomulin', -19.45303), ('Infubinol', 46.123472), ('Ketapril', 57.028795), ('Placebo', 51.297960)]

# Orient widths. Add labels, tick marks, etc. 
bar = plt.bar(range(len(drugs_pc)), [val[1] for val in drugs_pc], align='center', color='g')
plt.xticks(range(len(drugs_pc)), [val[0] for val in drugs_pc])
plt.title("Tumor Change Over 45 Day Treatment")
plt.ylabel("% Tumor Volume Change")
plt.grid()

# Save figure 
plt.gcf()
plt.savefig("Images/tumor_volume.png")

# Show figure 
plt.show()