In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [3]:
# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

In [4]:
# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drag_df = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_df = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
clinical_mouse_data_df = pd.merge(clinical_trial_df, mouse_drag_df, on="Mouse ID")

# Display the data table for preview
clinical_mouse_data_df.info()
clinical_mouse_data_df

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1906 entries, 0 to 1905
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Mouse ID            1906 non-null   object 
 1   Timepoint           1906 non-null   int64  
 2   Tumor Volume (mm3)  1906 non-null   float64
 3   Metastatic Sites    1906 non-null   int64  
 4   Drug                1906 non-null   object 
dtypes: float64(1), int64(2), object(2)
memory usage: 89.3+ KB


Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.000000,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin
...,...,...,...,...,...
1901,m601,25,33.118756,1,Capomulin
1902,m601,30,31.758275,1,Capomulin
1903,m601,35,30.834357,1,Capomulin
1904,m601,40,31.378045,1,Capomulin


## Tumor Response to Treatment

In [5]:
clinical_mouse_data_df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


In [6]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
# Convert to DataFrame & Preview DataFrame
result_mu_tumor_by_drugtime_df = clinical_mouse_data_df[['Drug', 'Timepoint', 'Tumor Volume (mm3)']].copy().sort_values(["Drug", "Timepoint"])
#result_mu_tumor_by_drugtime_df.info()
#result_mu_tumor_by_drugtime_df.head()

mu_tumor_by_drugtime_data_df = result_mu_tumor_by_drugtime_df.groupby(["Drug", "Timepoint"])['Tumor Volume (mm3)'].mean().to_frame().reset_index()
mu_tumor_by_drugtime_data_df.info()
mu_tumor_by_drugtime_data_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Drug                100 non-null    object 
 1   Timepoint           100 non-null    int64  
 2   Tumor Volume (mm3)  100 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.5+ KB


Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.000000
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
...,...,...,...
95,Zoniferol,25,55.432935
96,Zoniferol,30,57.713531
97,Zoniferol,35,60.089372
98,Zoniferol,40,62.916692


In [7]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
# Convert to DataFrame / Preview DataFrame
mu_drugtime_data_std_df = result_mu_tumor_by_drugtime_df.groupby(['Drug' ,'Timepoint'])['Tumor Volume (mm3)'].sem().to_frame().reset_index()
mu_drugtime_data_std_df.info()
mu_drugtime_data_std_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Drug                100 non-null    object 
 1   Timepoint           100 non-null    int64  
 2   Tumor Volume (mm3)  100 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.5+ KB


Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.000000
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731
...,...,...,...
95,Zoniferol,25,0.602513
96,Zoniferol,30,0.800043
97,Zoniferol,35,0.881426
98,Zoniferol,40,0.998515


In [8]:
# Minor Data Munging to Re-Format the Data Frames - mu_tumor_by_drugtime_data_df
trans_tumor_drugtime_df = mu_tumor_by_drugtime_data_df.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
# Preview that Reformatting worked
trans_tumor_drugtime_df.info()
trans_tumor_drugtime_df

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 45
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Capomulin  10 non-null     float64
 1   Ceftamin   10 non-null     float64
 2   Infubinol  10 non-null     float64
 3   Ketapril   10 non-null     float64
 4   Naftisol   10 non-null     float64
 5   Placebo    10 non-null     float64
 6   Propriva   10 non-null     float64
 7   Ramicane   10 non-null     float64
 8   Stelasyn   10 non-null     float64
 9   Zoniferol  10 non-null     float64
dtypes: float64(10)
memory usage: 880.0 bytes


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


In [9]:
# Isolate four treatments
tumor_drugtime4_df = mu_tumor_by_drugtime_data_df[(mu_tumor_by_drugtime_data_df['Drug'] == 'Capomulin') | (mu_tumor_by_drugtime_data_df['Drug'] == 'Placebo') | (mu_tumor_by_drugtime_data_df['Drug'] == 'Ketapril') | (mu_tumor_by_drugtime_data_df['Drug'] == 'Infubinol')]
tumor_drugtime4_df

trans_tumor_drugtime4_df = tumor_drugtime4_df.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
trans_tumor_drugtime4_df.info()
trans_tumor_drugtime4_df

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 45
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Capomulin  10 non-null     float64
 1   Infubinol  10 non-null     float64
 2   Ketapril   10 non-null     float64
 3   Placebo    10 non-null     float64
dtypes: float64(4)
memory usage: 400.0 bytes


Drug,Capomulin,Infubinol,Ketapril,Placebo
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,45.0,45.0,45.0,45.0
5,44.266086,47.062001,47.389175,47.125589
10,43.084291,49.403909,49.582269,49.423329
15,42.064317,51.296397,52.399974,51.359742
20,40.716325,53.197691,54.920935,54.364417
25,39.939528,55.715252,57.678982,57.482574
30,38.769339,58.299397,60.994507,59.809063
35,37.816839,60.742461,63.371686,62.420615
40,36.958001,63.162824,66.06858,65.052675
45,36.236114,65.755562,70.662958,68.084082


In [10]:
tumor_drugtime4_df

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
5,Capomulin,25,39.939528
6,Capomulin,30,38.769339
7,Capomulin,35,37.816839
8,Capomulin,40,36.958001
9,Capomulin,45,36.236114


In [None]:
# Generate the Plot (with Error Bars) - four treatments (Capomulin, Infubinol, Ketapril, and Placebo) compare.
# Add labels

plt.title("Tumor Response to Treatment")
plt.xlabel("Days/Time")
plt.ylabel("Tumor Volume (mm3)")

plt.grid()

# Save the Figure
plt.savefig("../Images/tumor_response.png")

# Show the Figure
plt.show()
plt.tight_layout();

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint, Convert to DataFrame
# Preview DataFrame

result_mu_meta_by_drugtime_df = clinical_mouse_data_df[['Drug', 'Timepoint', 'Metastatic Sites']].copy().sort_values(["Drug", "Timepoint"])
result_mu_meta_by_drugtime_df.info()
result_mu_meta_by_drugtime_df.head()

mu_meta_by_drugtime_data_df = result_mu_meta_by_drugtime_df.groupby(["Drug", "Timepoint"])['Metastatic Sites'].mean().to_frame().reset_index()
mu_meta_by_drugtime_data_df.info()
mu_meta_by_drugtime_data_df


In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
# Convert to DataFrame
# Preview DataFrame
mu_drugtime_data2_std_df = result_mu_meta_by_drugtime_df.groupby(['Drug' ,'Timepoint'])['Metastatic Sites'].sem().to_frame().reset_index()
mu_drugtime_data2_std_df.info()
mu_drugtime_data2_std_df

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mu_tumor_drugtime_df = mu_drugtime_data2_std_df.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
# Preview that Reformatting worked
mu_tumor_drugtime_df.info()
mu_tumor_drugtime_df

In [None]:
# Isolate four treatments
mu_tumor_drugtime4_df = mu_tumor_drugtime_df[(mu_tumor_drugtime_df['Drug'] == 'Capomulin') | (mu_tumor_by_drugtime_data_df['Drug'] == 'Placebo') | (mu_tumor_by_drugtime_data_df['Drug'] == 'Ketapril') | (mu_tumor_by_drugtime_data_df['Drug'] == 'Infubinol')]
mu_tumor_drugtime4_df

In [None]:
mu_trans_tumor_drugtime4_df = mu_tumor_drugtime4_df.pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
mu_trans_tumor_drugtime4_df.info()
mu_trans_tumor_drugtime4_df

In [None]:
# Generate the Plot (with Error Bars) - four treatments (Capomulin, Infubinol, Ketapril, and Placebo) compare.
# Add labels

plt.title("Metastatic Spread during Treatment")
plt.xlabel("Treatment Duration (Days)")
plt.ylabel("Metastatic Sites")

plt.grid()

# Save the Figure
plt.savefig("../Images/tumor_meta_sites_response.png")

# Show the Figure
plt.show()
plt.tight_layout();

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)
plt.title("Survival during Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Survival Rate %")

plt.grid()

# Save the Figure
plt.savefig("../Images/survival_rates.png")

# Show the Figure
plt.show()
plt.tight_layout();

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls

plt.title("Tumor Change - 45 Day Treatment")
plt.xticks("Time (Days)")
plt.ylabel("Tumor Volume Change %")

plt.grid()

# Save the Figure
plt.savefig("../Images/tumor_chg_treatmt.png")

# Show the Figure
plt.show()
plt.tight_layout();

fig.show()

![Metastatic Spread During Treatment](../Images/change.png)