In [None]:
#Import dependencies
%matplotlib inline
import matplotlib.pyplot as plt
from cycler import cycler
import pandas as pd
import numpy as np
import os
from scipy.stats import sem

#Hide warning messages in notebook
import warnings
warnings.filterwarnings("ignore")

#Outline stylesheet to use for plots
plt.style.use('ggplot')

In [None]:
#Pull data into notebook
#Create paths to datasets 
mouse_drug_data_to_load = os.path.join("data","mouse_drug_data.csv")
clinical_trial_data_to_load = os.path.join("data","clinicaltrial_data.csv")

#Read the Mouse and Drug Data and the Clinical Trial Data
mouse = pd.read_csv(mouse_drug_data_to_load)
clinical = pd.read_csv(clinical_trial_data_to_load)

#Combine the data into a single dataset
study_data = pd.merge(mouse, clinical, on="Mouse ID", how="outer")

#Display the data table for preview
study_data.head()


## Tumor Response to Treatment

In [None]:
#Tumor Response Mean
# Create the tumor mean dataframe
tumor_data = study_data.groupby(["Drug", "Timepoint"]) ["Tumor Volume (mm3)"]
tumor_mean = tumor_data.mean()
tumor_mean = tumor_mean.reset_index()

#Pivot tumor mean dataframe
tumor_mean_pivot = tumor_mean.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")

#Preview tumor standard error dataframe
tumor_mean_pivot

In [None]:
#Tumor Response Standard Error
#Create the tumor standard error dataframe
tumor_se = tumor_data.sem()
tumor_se = tumor_se.reset_index()

#Pivot tumor standard error dataframe
tumor_se_pivot = tumor_se.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")

#Preview tumor standard error dataframe
tumor_se_pivot

In [None]:
#Tumor Response Scatter Plot
#Define colors to use for charts
color_palette_list = ['#8C0036','#BE4236', '#D0803B', '#E6CF4B', '#99CC36','#94CCAE', '#4D819D',   
                      '#394B9C','#7B5CE0','#666666']
#Create tumor x axis
total_time = tumor_mean_pivot.index[-1]
x_axis = np.arange(0, total_time + 5, 5)

#Create tumor y axis and plot tumor data
drugs = tumor_mean_pivot.columns

fig, ax = plt.subplots()

for drug in drugs:
    error = tumor_se_pivot[drug]
    mean = tumor_mean_pivot[drug]
    tumor_response = plt.errorbar(x_axis, mean, error, marker="o", ls='-', lw=.5, label = drug)

#Format tumor scatter plot
plt.xlim(-3, total_time +25)
plt.title("Tumor Response to Treatment Over Time")
plt.xlabel("Days")
plt.ylabel("Tumor Volume (mm3)")
plt.legend(loc='best', facecolor="white")
plt.rcParams['text.color'] = '#555555'
plt.rcParams['axes.prop_cycle'] = cycler(color=color_palette_list)

#Save and display tumor scatter plot
plt.savefig(os.path.join("Images","tumortreatment.png"))
plt.show()
print("INSIGHT: Of the drugs tested, both Capomulin and Ramincane are the only treatments that, on average, decreased the mass of the tumor")

## Metastatic Response to Treatment

In [None]:
#Metastatic Sites Mean
# Create the metastatic sites mean dataframe
met_data = study_data.groupby(["Drug", "Timepoint"]) ["Metastatic Sites"]
met_mean = met_data.mean()
met_mean = met_mean.reset_index()

#Pivot metastatic sites  mean dataframe
met_mean_pivot = met_mean.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")

#Preview metastatic sites standard error dataframe
met_mean_pivot


In [None]:
#Metastatic Sites Standard Error
#Create the metastatic sites standard error dataframe
met_se = met_data.sem()
met_se = met_se.reset_index()

#Pivot metastatic sites standard error dataframe
met_se_pivot = met_se.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")

#Preview standard error dataframe
met_se_pivot

In [None]:
#Metastatic Sites Scatter Plot
#Create metastatic sites x axis
total_time = met_mean_pivot.index[-1]
x_axis = np.arange(0, total_time + 5, 5)

#Create metastatic sites y axis and plot metastatic sites data
drugs = met_mean_pivot.columns

fig, ax = plt.subplots()

for drug in drugs:
    error = met_se_pivot[drug]
    mean = met_mean_pivot[drug]
    met_response = plt.errorbar(x_axis, mean, error, marker="o", ls='-', lw=.5, label = drug)

#Format metastatic sites scatter plot
plt.xlim(-3, total_time +25)
plt.title("Metastatic Spread During Treatment")
plt.xlabel("Days")
plt.ylabel("Metastatic Sites")
plt.legend(loc='best', facecolor="white")
plt.rcParams['text.color'] = '#555555'
plt.rcParams['axes.prop_cycle'] = cycler(color=color_palette_list)

#Save and display metastatic sites scatter plot
plt.savefig(os.path.join("Images","metastaticspread.png"))
plt.show()
print("INSIGHT: On average, Capomulin and Ramincane show the slowest Metastatic Spread throughout the treatment")

## Survival Rates

In [None]:
#Mouse Count
# Create the mouse count dataframe
mouse_data = study_data.groupby(["Drug", "Timepoint"]) ["Mouse ID"].count()
mouse_count = mouse_data.reset_index()

#Pivot mouse count dataframe
mouse_count_pivot = mouse_count.pivot(index="Timepoint", columns="Drug", values="Mouse ID")

#Preview the mouse count dataframe
mouse_count_pivot


In [None]:
#Mouse Count Plot
#Create mouse count x axis
total_time = mouse_count_pivot.index[-1]
x_axis = np.arange(0, total_time + 5, 5)

#Create metastatic sites y axis and plot metastatic sites data
drugs = mouse_count_pivot.columns

fig, ax = plt.subplots()

for drug in drugs:
    count = mouse_count_pivot[drug]
    met_response = plt.errorbar(x_axis, count, marker="o", ls='-', lw=.5, label = drug)

#Format metastatic sites scatter plot
plt.xlim(-3, total_time +25)
plt.title("Mouse Survival Rate")
plt.xlabel("Days")
plt.ylabel("Mouse Count")
plt.legend(loc='best', facecolor="white")
plt.rcParams['text.color'] = '#555555'
plt.rcParams['axes.prop_cycle'] = cycler(color=color_palette_list)

#Save and display metastatic sites scatter plot
plt.savefig(os.path.join("Images","mousesurvival.png"))
plt.show()
print("INSIGHT: Of the mice being tested, those being treated with Capomulin or Ramicane were more likely to survive")

## Summary Bar Graph

In [None]:
#Percent Changes
#Calculate the percent decrease for each drug
drugs = tumor_mean_pivot.columns
percent_change = []
change_type = []

for drug in drugs:
    start = tumor_mean_pivot[drug].iloc[0]
    end = tumor_mean_pivot[drug].iloc[-1]
    change = ((end - start)/start) * 100
    percent_change.append(change)

#Create and view dataframe of the changes for reference
percent_change_df = pd.DataFrame({"Drug":drugs, "Percent_Change":percent_change})
percent_change_df

In [None]:
def pos_neg_color(df,color1,color2):
    return np.where(df.Percent_Change>0,color1,color2).T

#Create axes
total_drugs = percent_change_df["Drug"].count()
percent_change
x_axis = np.arange(0, total_drugs, 1)

#Create and format chart
plt.bar(x_axis, percent_change, align="center", tick_label = percent_change, color=pos_neg_color(percent_change_df,'#4D819D','#8C0036'))

tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, drugs, rotation="vertical")
plt.title("Percent of Tumor Change After Treatment")
plt.xlabel("Drugs")
plt.ylabel("Percent Change")


#Save and display summary bar chart
plt.savefig(os.path.join("Images","percentchange.png"))
plt.show()
print("INSIGHT: Treatments using Capomulin or Ramicane, on average, resulted in a significant decrease in the tumor volume. All other drug treatments, as well as the placebo showed high increases in tumor volume.")