In [1]:
%matplotlib notebook

In [2]:
# Dependencies and Setup
#%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Clinical Trial and Mouse Drug Data files to load
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug_df = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_df = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
trial_data_complete = pd.merge(clinical_trial_df, mouse_drug_df, how="left", on=["Mouse ID", "Mouse ID"])

# Display the data table for preview
trial_data_complete.head()
#My Analysis is based on the below four drugs, though was not sure if dataset should be 
#just limited to these four hence not done that.
#drug_names = ["Capomulin","Ketapril","Infubinol","Placebo"]
drug_names = ["Capomulin","Infubinol","Ketapril", "Placebo"]

## Tumor Response to Treatment

In [4]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
#Tumor Response data based on Drug, Timepoint and Tumor Volume(mm3)
tumor_respone_data = trial_data_complete[["Drug","Timepoint","Tumor Volume (mm3)"]]
#Grouping on Drug and Timepoint
drug_timepoint_grp = tumor_respone_data.groupby(["Drug","Timepoint"])
#Obtaining the mean of the Tumor Volume after grouping
drug_mean = drug_timepoint_grp["Tumor Volume (mm3)"].mean()

# Convert to DataFrame and Store the Mean Tumor Volume Data
tumor_response_mean_df = pd.DataFrame({"Tumor Volume (mm3)" : drug_mean}).reset_index()

# Preview DataFrame
tumor_response_mean_df.head()



Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [5]:
#Using the same groupby created above for mean
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
drug_standard_error = drug_timepoint_grp["Tumor Volume (mm3)"].sem()

# Convert to DataFrame and Store the Standard Error of Tumor Volumes
tumor_response_sterr_df = pd.DataFrame({"Tumor Volume (mm3)" : drug_standard_error}).reset_index()
# Preview DataFrame
tumor_response_sterr_df.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [6]:
# Minor Data Munging to Re-Format the Data Frames
#Pivoting the table to better plot.
tumor_response_mean_df = tumor_response_mean_df.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")
tumor_response_sterr_df = tumor_response_sterr_df.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")

# Preview that Reformatting worked
tumor_response_sterr_df
tumor_response_mean_df

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


In [7]:
#Retireving Plotting Data
#Data for X axis
timepoints = tumor_response_mean_df.index

#Data for Y axis
meanCapomulin = tumor_response_mean_df.iloc[0:,0]
semCapomulin = tumor_response_sterr_df.iloc[0:,0]

meanInfubinol = tumor_response_mean_df.iloc[0:,2]
semInfubinol = tumor_response_sterr_df.iloc[0:,2]

meanKetapril = tumor_response_mean_df.iloc[0:,3]
semKetapril = tumor_response_sterr_df.iloc[0:,3]

meanPlacebo = tumor_response_mean_df.iloc[0:,5]
semPlacebo = tumor_response_sterr_df.iloc[0:,5]




In [8]:
# Show the Figure
#fig = plt.figure() 
#plt.figure(figsize=(5,5))

# Generate the Plot (with Error Bars)
plt.errorbar(timepoints, meanCapomulin, yerr=semCapomulin, marker="^",label='Capomulin')

plt.errorbar(timepoints, meanInfubinol, yerr=semInfubinol, marker="D",label='Infubinol')

plt.errorbar(timepoints, meanKetapril, yerr=semKetapril,marker="o",label='Ketapril')

plt.errorbar(timepoints, meanPlacebo, yerr=semPlacebo, marker="s",label='Placebo')


plt.title("Tumor Response To Treatment")
plt.xlabel("Time(Days)")
plt.ylabel("Tumor Volume (mm3)")

plt.legend(loc='best')
plt.grid()
plt.show()

# Save the Figure
#Will do this later



<IPython.core.display.Javascript object>

## Metastatic Response to Treatment

In [9]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
#Metastatic Sites Response data based on Drug, Timepoint and Metastatic Sites
met_respone_data = trial_data_complete[["Drug","Timepoint","Metastatic Sites"]]
#Grouping data on Drug and Timepoint
met_response_grp = met_respone_data.groupby(["Drug","Timepoint"])
#Calculate the mean on Metastatic Sites
met_mean = met_response_grp["Metastatic Sites"].mean()
# Convert to DataFrame to Store the Mean Met. Site Data
met_response_mean_df = pd.DataFrame({"Metastatic Sites" : met_mean}).reset_index()
# Preview DataFrame
met_response_mean_df.head()



Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [10]:
#using the same data groupby object used earlier

#Calculate the Stderr on Metastatic Sites
met_stderr = met_response_grp["Metastatic Sites"].sem()

# Convert to DataFrame and Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
met_response_stderr_df = pd.DataFrame({"Metastatic Sites" : met_stderr}).reset_index()
# Preview DataFrame
met_response_stderr_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621


In [11]:
# Minor Data Munging to Re-Format the Data Frames
#Pivoting the table to better plot.
met_response_mean_df = met_response_mean_df.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")
met_response_stderr_df = met_response_stderr_df.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")
# Preview that Reformatting worked
met_response_mean_df
met_response_stderr_df


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378
25,0.181818,0.258831,0.265753,0.288275,0.18524,0.263888,0.199095,0.11943,0.219824,0.236621
30,0.172944,0.249479,0.227823,0.347467,0.266667,0.300264,0.266469,0.139968,0.230641,0.248168
35,0.169496,0.266526,0.224733,0.361418,0.330464,0.341412,0.366667,0.145997,0.240983,0.285714
40,0.17561,0.289128,0.314466,0.315725,0.321702,0.297294,0.433903,0.160591,0.312815,0.299791
45,0.202591,0.286101,0.30932,0.278722,0.351104,0.30424,0.428571,0.190221,0.359062,0.2864


In [12]:
#Retireving Plotting Data
#Data for X axis
timepoints = met_response_mean_df.index

#Data for Y axis
mean_met_Capomulin = met_response_mean_df.iloc[0:,0]
sem_met_Capomulin = met_response_stderr_df.iloc[0:,0]

mean_met_Infubinol = met_response_mean_df.iloc[0:,2]
sem_met_Infubinol = met_response_stderr_df.iloc[0:,2]

mean_met_Ketapril = met_response_mean_df.iloc[0:,3]
sem_met_Ketapril = met_response_stderr_df.iloc[0:,3]

mean_met_Placebo = met_response_mean_df.iloc[0:,5]
sem_met_Placebo = met_response_stderr_df.iloc[0:,5]

In [13]:

# Generate the Plot (with Error Bars)
#fig = plt.figure() 
#plt.figure(figsize=(5,5))
plt.errorbar(timepoints, mean_met_Capomulin, yerr=sem_met_Capomulin, marker="^",label='Capomulin')
plt.errorbar(timepoints, mean_met_Infubinol, yerr=sem_met_Infubinol, marker="D",label='Infubinol')
plt.errorbar(timepoints, mean_met_Ketapril, yerr=sem_met_Ketapril, marker="o",label='Ketapril')
plt.errorbar(timepoints, mean_met_Placebo, yerr=sem_met_Placebo, marker="s",label='Placebo')

# Show the Figure
#plt.xlim(0,50)
#plt.ylim(0,75)
plt.title("Metastatic Spread During Treatment")
plt.xlabel("Timepoint")
plt.ylabel("Met Sites")

plt.legend(loc='best')
plt.grid()
plt.show()

# Save the Figure
#will do this later


## Survival Rates

In [14]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
#Getting the data for the mice survival
mice_data = trial_data_complete[["Drug","Timepoint","Mouse ID"]]
#Grouping data on Drug and Timepoint
mice_data_grp = mice_data.groupby(["Drug","Timepoint"])
mice_data_grp.head()
#Obtaining the count on Mice Survival
mice_survival = mice_data_grp["Mouse ID"].count()
# Convert to DataFrame Store the Count of Mice 
mice_survival_count_df = pd.DataFrame({"Mouse Count" : mice_survival}).reset_index()

#Declaring the initial count of Mice
mice_init_count = 25
#Calculating the survival Percentage of Mice and adding to the Dataframe.
mice_survival_count_df["Survival"] = mice_survival_count_df["Mouse Count"]*100/mice_init_count
# Preview DataFrame
mice_survival_count_df.head()


Unnamed: 0,Drug,Timepoint,Mouse Count,Survival
0,Capomulin,0,25,100.0
1,Capomulin,5,25,100.0
2,Capomulin,10,25,100.0
3,Capomulin,15,24,96.0
4,Capomulin,20,23,92.0


In [15]:
# Minor Data Munging to Re-Format the Data Frames
#Schedule 
#mice_survival_count_df = mice_survival_count_df.pivot(index="Timepoint", columns="Drug", values="Mouse Count")
mice_survival_count_df = mice_survival_count_df.pivot(index="Timepoint", columns="Drug", values="Survival")

# Preview the Data Frame
mice_survival_count_df

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,100.0,100.0,100.0,100.0,100.0,100.0,104.0,100.0,104.0,100.0
5,100.0,84.0,100.0,92.0,92.0,96.0,100.0,100.0,100.0,96.0
10,100.0,80.0,84.0,88.0,84.0,96.0,92.0,96.0,92.0,88.0
15,96.0,76.0,84.0,76.0,84.0,80.0,68.0,96.0,92.0,84.0
20,92.0,72.0,80.0,76.0,80.0,76.0,68.0,92.0,84.0,68.0
25,88.0,72.0,72.0,76.0,72.0,68.0,56.0,92.0,76.0,64.0
30,88.0,64.0,68.0,72.0,60.0,60.0,52.0,92.0,72.0,60.0
35,88.0,56.0,48.0,68.0,60.0,56.0,40.0,84.0,64.0,56.0
40,84.0,56.0,40.0,60.0,60.0,48.0,36.0,80.0,48.0,56.0
45,84.0,52.0,36.0,44.0,52.0,44.0,28.0,80.0,44.0,56.0


In [16]:
timepoints = mice_survival_count_df.index
mice_survival_Capomulin = mice_survival_count_df.iloc[0:,0]

mice_survival_Ceftamin = mice_survival_count_df.iloc[0:,1]

mice_survival_Infubinol = mice_survival_count_df.iloc[0:,2]

mice_survival_Ketapril = mice_survival_count_df.iloc[0:,3]

len(timepoints)
len(mice_survival_Capomulin)
timepoints


Int64Index([0, 5, 10, 15, 20, 25, 30, 35, 40, 45], dtype='int64', name='Timepoint')

In [17]:
# Generate the Plot (Accounting for percentages)
fig = plt.figure() 
#plt.figure(figsize=(5,5))
plt.plot(timepoints, mice_survival_Capomulin, marker="^",label='Capomulin')
plt.plot(timepoints, mice_survival_Ceftamin, marker="s",label='Ceftamin')
plt.plot(timepoints, mice_survival_Infubinol, marker="D",label='Infubinol')
plt.plot(timepoints, mice_survival_Ketapril, marker="o",label='Ketapril')

plt.xlim(0,50)
plt.ylim(35,105)
# Save the Figure
plt.title("Survival During Treatment")
plt.xlabel("Time(Days)")
plt.ylabel("Survival Rate (%)")

plt.legend(loc='best')
plt.grid()

# Show the Figure
plt.show()

<IPython.core.display.Javascript object>

## Summary Bar Graph

In [18]:

# Calculate the percent changes for each drug
first_timepoint = timepoints[0]
last_timepoint = timepoints[len(timepoints)-1]
print(first_timepoint)
print(last_timepoint)
drug_percent = []
drug_percent.append((tumor_response_mean_df["Capomulin"][last_timepoint] - tumor_response_mean_df["Capomulin"][first_timepoint])/ tumor_response_mean_df["Capomulin"][first_timepoint])
# Display the data to confirm
#drug_percent.append((tumor_response_mean_df["Ceftamin"][last_timepoint] - tumor_response_mean_df["Ceftamin"][first_timepoint])/ tumor_response_mean_df["Ceftamin"][first_timepoint])
drug_percent.append((tumor_response_mean_df["Infubinol"][last_timepoint] - tumor_response_mean_df["Infubinol"][first_timepoint])/ tumor_response_mean_df["Infubinol"][first_timepoint])
drug_percent.append((tumor_response_mean_df["Ketapril"][last_timepoint] - tumor_response_mean_df["Ketapril"][first_timepoint])/ tumor_response_mean_df["Ketapril"][first_timepoint])
#drug_percent.append((tumor_response_mean_df["Naftisol"][last_timepoint] - tumor_response_mean_df["Naftisol"][first_timepoint])/ tumor_response_mean_df["Naftisol"][first_timepoint])
drug_percent.append((tumor_response_mean_df["Placebo"][last_timepoint] - tumor_response_mean_df["Placebo"][first_timepoint])/ tumor_response_mean_df["Placebo"][first_timepoint])
#drug_percent.append((tumor_response_mean_df["Propriva"][last_timepoint] - tumor_response_mean_df["Propriva"][first_timepoint])/ tumor_response_mean_df["Propriva"][first_timepoint])
#drug_percent.append((tumor_response_mean_df["Ramicane"][last_timepoint] - tumor_response_mean_df["Ramicane"][first_timepoint])/ tumor_response_mean_df["Ramicane"][first_timepoint])
#drug_percent.append((tumor_response_mean_df["Stelasyn"][last_timepoint] - tumor_response_mean_df["Stelasyn"][first_timepoint])/ tumor_response_mean_df["Stelasyn"][first_timepoint])
#drug_percent.append((tumor_response_mean_df["Zoniferol"][last_timepoint] - tumor_response_mean_df["Zoniferol"][first_timepoint])/ tumor_response_mean_df["Zoniferol"][first_timepoint])

drug_summary_df = pd.DataFrame({"Drug": drug_names, "Percentage": drug_percent })
drug_summary_df["Percentage"] = drug_summary_df["Percentage"]*100



drug_summary_df

0
45


Unnamed: 0,Drug,Percentage
0,Capomulin,-19.475303
1,Infubinol,46.123472
2,Ketapril,57.028795
3,Placebo,51.29796


In [19]:
# Store all Relevant Percent Changes into a Tuple
x_axis = np.arange(0,4,1)
y_axis = drug_summary_df["Percentage"]

fig, ax = plt.subplots()
# Splice the data between passing and failing drugs
pass_drug = y_axis >= 0
fail_drug = y_axis < 0

# Orient widths. Add labels, tick marks, etc. 

# Use functions to label the percentages of changes

# Call functions to implement the function calls
passlabel = ax.bar(x_axis[pass_drug], y_axis[pass_drug], color = 'red')
faillabel = ax.bar(x_axis[fail_drug], y_axis[fail_drug], color = 'green')
#plt object 
#How does set_xticks work with xticklabels
#ax.set_xticks(x_axis)
#ax.set_xticklabels(drug_names)
plt.xticks(x_axis,drug_names)
def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        print(rect)
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height/2),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')



autolabel(passlabel)
autolabel(faillabel)
ax.grid()
# Save the Figure


# Show the Figure
fig.show()

type(pass_drug)

<IPython.core.display.Javascript object>

Rectangle(xy=(0.6, 0), width=0.8, height=46.1235, angle=0)
Rectangle(xy=(1.6, 0), width=0.8, height=57.0288, angle=0)
Rectangle(xy=(2.6, 0), width=0.8, height=51.298, angle=0)
Rectangle(xy=(-0.4, 0), width=0.8, height=-19.4753, angle=0)


pandas.core.series.Series

In [20]:
#Example used to do the bar graph.
x = np.arange(10)
y = np.arange(10) * 0.1

mask1 = y < 0.5
mask2 = y >= 0.5

plt.bar(x[mask1], y[mask1], color = 'red')
plt.bar(x[mask2], y[mask2], color = 'blue')
plt.show()
type(mask1)

numpy.ndarray