In [None]:
# Include this line to make plots interactive
%matplotlib notebook

In [None]:
#Importing libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import colors
from matplotlib.ticker import FuncFormatter

In [None]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [None]:
# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

In [None]:
#Reading Mouse Drug Data File
mouse_drug_data = pd.read_csv(mouse_drug_data_to_load)
mouse_drug_data

In [None]:
#Reading data from the Cinical Trial sheet
clinical_trial = pd.read_csv(clinical_trial_data_to_load)
clinical_trial

In [None]:
#Merging the two data files into one dataset
mouse_clinical_data = pd.merge(clinical_trial, mouse_drug_data, how="left", on=['Mouse ID','Mouse ID'])

#Displaying the table for preview
mouse_clinical_data.head()

# Tumor Response to Treatment

In [None]:
#Calculating the Mean Tumor VOlume  grouped by Drug and Timepoint
meanTumor = mouse_clinical_data.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean()

#Converting the series into a dataframe
tumorVolMean = pd.DataFrame(meanTumor).reset_index()

#Displaying the dataframe
tumorVolMean

In [None]:
#Calculating the Standard Error of Tumor Volumes grouped by Drug and TImepoint
standardTumor = mouse_clinical_data.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].sem()

#Storing the data into a dataframe
standardErrTumor = pd.DataFrame(standardTumor).reset_index()

#Displaying the dataframe
standardErrTumor.head()

In [None]:
#Re-Formatting the Mean Volume dataframe
#Got this attrivute from "https://hackernoon.com/reshaping-data-in-python-fa27dda2ff77"
formatted_tumorMean = tumorVolMean.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')

#Displaying the formatted dataframe
formatted_tumorMean

In [None]:
#Re-Formatting the Standard Error Volume dataframe
formatted_standarderr = standardErrTumor.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')

#Displaying the dataframe
formatted_standarderr

In [None]:
#Generating the plot
VolumeChart = formatted_tumorMean.plot(kind='line',yerr=formatted_standarderr, title= 'Tumor Response Treatment', figsize=(8,6), linewidth=1)
plt.show()

In [None]:
#Setting limits for the plot
VolumeChart.set_xlim(-1,50)
VolumeChart.set_ylim(30,80)

In [None]:
#Defining marker list to generate it on the plot
markers = ['H', '^', 'v', 's', '3', 'o', '1', '*','x','+']
for i, line in enumerate(VolumeChart.get_lines()):
    line.set_marker(markers[i])

In [None]:
#Displaying the legend with the updated markers
plt.legend(loc="best")

In [None]:
#Setting Plot labels
VolumeChart.set_xlabel("TIme")
VolumeChart.set_ylabel("Volume(mm3)")

In [None]:
#Aligning the plot
plt.tight_layout()

In [None]:
fig = VolumeChart.get_figure()
fig.savefig('Images/Tumor Treatment Response Chart')

#  Metastatic Response to Treatment

In [None]:
#Calculating Mean Met Site Data grouped bu Drug and Timepoint
meanMetSite = mouse_clinical_data.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].mean()

#storing Data in a dataframe
meanMetSiteData = pd.DataFrame(meanMetSite)

#Displaying Data
meanMetSiteData.head()

In [None]:
#Calculating Mean Met Site Data grouped bu Drug and Timepoint
standardErrMetSite = mouse_clinical_data.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].sem()

#storing Data in a dataframe
standardErrMetSiteData = pd.DataFrame(standardErrMetSite)

#Displaying Data
standardErrMetSiteData.head()

In [None]:
#Re-Formatting the Mean Volume dataframe
meanMetSiteData = meanMetSiteData.reset_index()
formattedMeanMet = meanMetSiteData.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')

#Displaying Data
formattedMeanMet

In [None]:
#Re-Formatting the Standard Error Volume dataframe
standardErrMetSiteData = standardErrMetSiteData.reset_index()
formattedErrMet = standardErrMetSiteData.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')

#Displaying Data
formattedErrMet

In [None]:
#Generating the plot
metaStaticPlot =formattedMeanMet.plot(kind='line', yerr=formattedErrMet, title= 'Metastatic Sites', figsize=(8,6))
plt.show()

In [None]:
#Setting plot labels
metaStaticPlot.set_xlabel('Time')
metaStaticPlot.set_ylabel('Metastatic Sites')

In [None]:
#Defining marker list to generate it on the plot
markers = ['H', '^', 'v', 's', '3', 'o', '1', '*','x','+']
for i, line in enumerate(metaStaticPlot.get_lines()):
    line.set_marker(markers[i])

In [None]:
#Displaying the legend with the updated markers
plt.legend(loc="best")

In [None]:
#Aligning the plot
plt.tight_layout()

In [None]:
fig = metaStaticPlot.get_figure()
fig.savefig('Images/Metastatic Sites Chart')

#  Survival Rates

In [None]:
#Calculating Count of Mice Grouped by Drug and Timepoint
mouse = mouse_clinical_data.groupby(['Drug', 'Timepoint'])['Mouse ID'].count()

#Converting to Dataframe
mouseCount = pd.DataFrame(mouse)

#Renaming the mouse count column
mouseCount.rename(columns = {'Mouse ID' : 'Mouse Count'}, inplace=True)

#Displaying Data
mouseCount.head()

In [None]:
#Re-formatting the dataframe
mouseCount = mouseCount.reset_index()
formattedMouseCount = mouseCount.pivot(index='Timepoint', columns='Drug', values='Mouse Count')

#Displaying Data
formattedMouseCount

In [None]:
#Generating Plot
surRatesPlot = formattedMeanMet.plot(kind='line', title= 'Mouse Count', figsize=(10,8))

In [None]:
for i, line in enumerate(surRatesPlot.get_lines()):
    line.set_marker(markers[i])

In [None]:
plt.legend(loc='best')

In [None]:
plt.tight_layout()

In [None]:
fig = surRatesPlot.get_figure()
fig.savefig('Images/Mouse Count Chart')

In [None]:
plt.show()

# Summary Bar Graph

In [None]:
#Calculating the percent change for each drug
change = (formatted_tumorMean.iloc[9,:] - formatted_tumorMean.iloc[0, :])/formatted_tumorMean.iloc[0, :] * 100

summary_df = pd.DataFrame(change)
summary_df['positive'] = change.values > 0

summary_df.rename(columns={0: "Percent"}, inplace=True)

summary_df

In [None]:
#GOt color conditioning code from https://stackoverflow.com/questions/22311139/matplotlib-bar-chart-choose-color-if-value-is-positive-vs-value-is-negative
fig, ax = plt.subplots()
summaryPlot = summary_df['Percent'].plot(kind='bar', alpha=0.5, align='center',color=summary_df.positive.map({True: 'g', False: 'r'}), ax=ax, figsize = (8,6), title = 'Percentage Summary')


In [None]:
summaryPlot.hlines(0,-1,10, alpha=0.5, color='black')

In [None]:
summaryPlot.set_xticklabels(summary_df.index, rotation='vertical')

In [None]:
summaryPlot.set_xlim(-0.5, len(summary_df.index)-.25)

In [None]:
summaryPlot.set_ylabel('Percentage Change')

In [None]:
#Displaying values on the top of the bar graph, got this code from 
#http://composition.al/blog/2015/11/29/a-better-way-to-add-labels-to-bar-charts-with-matplotlib/ 
(y_bottom, y_top) = ax.get_ylim()
y_height = y_top - y_bottom
for i in ax.patches:
    height = i.get_height()

    # Fraction of axis height taken up by this rectangle
    p_height = (height / y_height)

    # If we can fit the label above the column, do that;
    # otherwise, put it inside the column.
    if p_height > 0.95: # arbitrary; 95% looked good to me.
          label_position = height - (y_height * 0.05)
    else:
        label_position = height + (y_height * 0.01)

    ax.text(i.get_x() + i.get_width()/2., label_position,
            '%d' % int(height) + '%',
            ha='center', va='bottom')


In [None]:
#Adding percent sign to the values, got this code from 
#https://stackoverflow.com/questions/36116718/how-to-change-bar-chart-values-to-percentages-matplotlib
formatter = FuncFormatter(lambda y, pos: "%d%%" % (y))
ax.yaxis.set_major_formatter(formatter)

In [None]:
plt.tight_layout()

In [None]:
fig = summaryPlot.get_figure()
fig.savefig('Images/Mouse Count Chart')

In [None]:
plt.show()