In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_ndrug_df = pd.read_csv(mouse_drug_data_to_load)
ctrial_df = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset 
combined_df = pd.merge(mouse_ndrug_df, ctrial_df,
                                 how='outer', on='Mouse ID')

# Display the data table for preview
combined_df.head()

## Tumor Response to Treatment

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 

tumor_mean_df = combined_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean()
tumor_mean_df 

# Convert to DataFrame
tumor_mean_df = pd.DataFrame(tumor_mean_df.reset_index())

# Preview DataFrame
tumor_mean_df.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumor_sem_df = combined_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].sem()
tumor_sem_df 

# Convert to DataFrame
tumor_sem_df = pd.DataFrame(tumor_sem_df.reset_index())

# Preview DataFrame
tumor_sem_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames:

#transpose the drug and mean data with the timepoint as the index using .pivot
tumor_mean_pivot = tumor_mean_df.pivot(index="Timepoint", columns="Drug")["Tumor Volume (mm3)"]


# Preview that Reformatting worked
tumor_mean_pivot.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames:

#transpose the drug and sem data with the timepoint as the index using .pivot
tumor_sem_pivot = tumor_sem_df.pivot(index="Timepoint", columns="Drug")["Tumor Volume (mm3)"]

# Preview that Reformatting worked
tumor_sem_pivot.head()

In [None]:
# Generate the Plot (with Error Bars)
plt.errorbar(tumor_mean_pivot.index, tumor_mean_pivot["Capomulin"], \
             yerr=tumor_sem_pivot["Capomulin"], color="r", marker="o", markersize=5, \
             linestyle='--',linewidth=0.5)
                                   
plt.errorbar(tumor_mean_pivot.index, tumor_mean_pivot["Infubinol"], \
             yerr=tumor_sem_pivot["Infubinol"], color="b", marker="^", markersize=5, \
             linestyle='--',linewidth=0.5)

plt.errorbar(tumor_mean_pivot.index, tumor_mean_pivot["Ketapril"], \
             yerr=tumor_sem_pivot["Ketapril"], color="g", marker="s", markersize=5, \
             linestyle='--',linewidth=0.5)
                                   
plt.errorbar(tumor_mean_pivot.index, tumor_mean_pivot["Placebo"], \
             yerr=tumor_sem_pivot["Placebo"], color="k", marker="d", markersize=5, \
             linestyle='--',linewidth=0.5)
                                         
                                           
# Add proper labeling to the plot                                                                                       
plt.title("Tumor Respone to Treatment") 
plt.ylabel("Tumor Volume (mm3)")
plt.xlabel("Time (Days)")
plt.grid(axis='y')
plt.legend(['Capomulin', 'Infubinol', 'Ketapril', 'Placebo'],loc="best", fontsize="small", fancybox=True)
            
# Save the Figure
plt.savefig("../Images/treatment2.png")


What the Tumor Response to Treatment Plot Shows:
The plot graphs shows that the Capoulin treatment appears to have the most positive effect 
on tumor growth over the past 45 days, starting after day 5. Data shows an average tumor 
volume shrinkage of 10 mm3.
The tumor continued to grow, after day 5, with the other treatments: volume growth averaged 
between 15-to-25 mm3. It appears, the other 2 drugs had generally the same effect as having 
no treatment (taking the placebo); the Katapril treatment having the worst effect.

In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
metsite_mean_df = combined_df.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].mean()
metsite_mean_df 

# Convert to DataFrame
metsite_mean_df = pd.DataFrame(metsite_mean_df.reset_index())

# Preview DataFrame
metsite_mean_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
metsite_sem_df = combined_df.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].sem()
metsite_sem_df 

# Convert to DataFrame
metsite_sem_df = pd.DataFrame(metsite_sem_df.reset_index())

# Preview DataFrame
metsite_sem_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames

#transpose the drug and mean data with the timepoint as the index using .pivot
metsite_mean_pivot = metsite_mean_df.pivot(index="Timepoint", columns="Drug")["Metastatic Sites"]


# Preview that Reformatting worked
metsite_mean_pivot.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames

#transpose the drug and sem data with the timepoint as the index using .pivot
metsite_sem_pivot = metsite_sem_df.pivot(index="Timepoint", columns="Drug")["Metastatic Sites"]

# Preview that Reformatting worked
metsite_sem_pivot.head()

In [None]:
# Generate the Plot (with Error Bars)

plt.errorbar(metsite_mean_pivot.index, metsite_mean_pivot["Capomulin"], \
             yerr=metsite_sem_pivot["Capomulin"], color="r", marker="o", markersize=5, \
             linestyle='--',linewidth=0.5)
                                   
plt.errorbar(metsite_mean_pivot.index, metsite_mean_pivot["Infubinol"], \
             yerr=metsite_sem_pivot["Infubinol"], color="b", marker="^", markersize=5, \
             linestyle='--',linewidth=0.5)

plt.errorbar(metsite_mean_pivot.index, metsite_mean_pivot["Ketapril"], \
             yerr=metsite_sem_pivot["Ketapril"], color="g", marker="s", markersize=5, \
             linestyle='--',linewidth=0.5)
                                   
plt.errorbar(metsite_mean_pivot.index, metsite_mean_pivot["Placebo"], \
             yerr=tumor_sem_pivot["Placebo"], color="k", marker="d", markersize=5, \
             linestyle='--',linewidth=0.5)
                                         
                                           
# Add proper labeling to the plot                                                                                       
plt.title("Metastatic Spread During Treatment") 
plt.ylabel("Met. Sites")
plt.xlabel("Treatment Duration (Days)")
plt.grid(axis='y')
plt.legend(['Capomulin', 'Infubinol', 'Ketapril', 'Placebo'],loc="best", fontsize="small", fancybox=True)
            
# Save the Figure
plt.savefig("../Images/spread2.png")

What the Metastatic Spread During Treatment Plot Shows: The plot shows that the matastatic spread (spread of cancer to new areas of the body) occured with all treatments, but the spread during treatment with the Capomulin drug was .5 to nearly 2.5 less sites than with the other drugs (including the placebo) over the 45 day period.  The placebo and Ketapril treatments had the same spread at the end of the period, although the spread appeared to occur faster with the placebo treatment.  Spread with the Infubinol treatment started the same as the Ketapril treatment up through day 20, but slowed over the remaining 25 days.

In [None]:
# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
mice_ct = combined_df.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].count()
mice_ct 

# Convert to DataFrame
survival_df = pd.DataFrame({"Mouse Count": mice_ct})
survival_df = survival_df.reset_index()

# Preview DataFrame
survival_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames

#transpose the drug and sem data with the timepoint as the index using .pivot
survival_pivot = survival_df.pivot(index="Timepoint", columns="Drug")["Mouse Count"]

# Preview that Reformatting worked
survival_pivot.head()

In [None]:
# Generate the Plot (Accounting for percentages)
plt.plot(100 * survival_pivot["Capomulin"] / 25, color="r", marker="o", markersize=5,linestyle='--',linewidth=0.5)
                                   
plt.plot(100 * survival_pivot["Infubinol"] / 25, color="b", marker="^", markersize=5,linestyle='--',linewidth=0.5)

plt.plot(100 * survival_pivot["Ketapril"] / 25, color="g", marker="s", markersize=5,linestyle='--',linewidth=0.5)
                                   
plt.plot(100 * survival_pivot["Placebo"] / 25, color="k", marker="d", markersize=5,linestyle='--',linewidth=0.5)

# Add proper labeling to the plot                                                                                       
plt.title("Survival During Treatment") 
plt.ylabel("Survival Rate (%)")
plt.xlabel("Time (Days)")
plt.grid(True)
plt.legend(['Capomulin', 'Infubinol', 'Ketapril', 'Placebo'],loc="best", fontsize="small", fancybox=True)
            
# Save the Figure
plt.savefig("../Images/survival2.png")

What the Survival During Treatment Plot Shows: The survival rate was less than 50% for all treatments except for Capomulin over the 45-day period.  Under the Capomulin treatment, the survival rate remained 100% through day 10, plateued right under 90% from day 25 to 35, and then plateuaed again at 85% the remaining 5 days.  Survival rates show a steady decline over the period for Placebo and Infubinol treatemnts.  Rates took a plunge after day 30 under the Infubinol, losing over 20% of mice in 5 days. Survival rates under the Katapril treatment reached a 10-day plateua before beginning to fall again, matching the Placebo rates at the end of the 45-day period.

In [None]:
# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug
drug_pctchg = 100 * (tumor_mean_pivot.iloc[-1] - tumor_mean_pivot.iloc[0]) / tumor_mean_pivot.iloc[0]

# Display the data to confirm
tumor_mean_pivot.iloc[-1]
tumor_mean_pivot.iloc[0]
drug_pctchg

In [None]:
# Calculate the percent changes for each drug
drug_pctchg_sem = 100 * (tumor_sem_pivot.iloc[-1] - tumor_sem_pivot.iloc[0] / tumor_sem_pivot.iloc [0])

# Display the data to confirm
tumor_sem_pivot.iloc[-1]
tumor_sem_pivot.iloc[0]
drug_pctchg_sem

In [1]:
# Store all Relevant Percent Changes into a Tuple
rel_pctchg = (drug_pctchg["Capomulin"],
              drug_pctchg["Infubinol"],
              drug_pctchg["Ketapril"],
              drug_pctchg["Placebo"])
rel_pctchg

# Splice the data between passing and failing drugs
fig, ax = plt.subplots()   # unpack the tuple into variables
ind = np.arange(len(rel_pctchg))   # x locations of the groups
width = 1   #the width of the bars
rects_pass = ax.bar(ind[0], rel_pctchg[0], width, color='green')
rects_fail = ax.bar(ind[1:], rel_pctchg[1:], width, color='red')

# Orient widths. Add labels, tick marks, etc. 
ax.set_ylabel('% Tumor Volume Change')
ax.set_title('Tumor Change Over 45 Day Treatment')
ax.set_xticks(ind + 0.5)
ax.set_xticklabels(('Campomulin', 'Infubinol', 'Ketapril', 'Placebo'))
ax.set_autoscaley_on(False)
ax.set_ylim(-30,70)
ax.grid(True)

# Use functions to label the percentages of changes
def autolabelPass(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2., 3, '%d%%'%int(height),
                ha='center', va='bottom')
            
def autolabelFail(rects):
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2., -8, '%d%%'%int(height),
                ha='center', va='bottom')

# Call functions to implement the function calls
autolabelPass(rects_pass)
autolabelFail(rects_fail)

# Save the Figure
fig.savefig("../Images/change2.png")

NameError: name 'drug_pctchg' is not defined

What the Summary Bar Graph Shows: Cancer appeard to be in the decline (19%) for Capomulin treatments, while on the rise under the other 3 treatments upwards of 46+ percent over the 45-day period. The graph illustrates Capolmulin  "passing" during the test period with a negative percentof change and the others "failing" due to the  high percent of change over the same period.  Interestingly, Ketapril treatments appeard to be slightly worse than doing nothing at all with a 6% higher change than that of the placebo treatment.

In [None]:
# Show the Figure
fig.show()

![Metastatic Spread During Treatment](../Images/change.png)