In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

In [46]:
# Read the Mouse and Drug Data
mouseDrugSource = pd.read_csv(mouse_drug_data_to_load)
mouseDrugSource.head()

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn


In [47]:
# Read the Clinical Trial Data
clinicalTrialSource = pd.read_csv(clinical_trial_data_to_load)
clinicalTrialSource.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.0,0
1,f932,0,45.0,0
2,g107,0,45.0,0
3,a457,0,45.0,0
4,c819,0,45.0,0


In [48]:
# Combine the data into a single dataset
mergedSource = pd.merge(mouseDrugSource, clinicalTrialSource, on='Mouse ID')

# Display the data table for preview
mergedSource.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [49]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint
groupedSource = mergedSource.groupby(['Drug', 'Timepoint']).agg({
                                                                'Tumor Volume (mm3)': 'mean'})

groupedSource.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.0
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325


In [50]:
# Convert to DataFrame
groupedSource_df = pd.DataFrame(groupedSource)

# Preview DataFrame
groupedSource_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.0
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325


In [10]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
#groupedSource_df['Standard Error'] = groupedSource_df['Tumor Volume (mm3)'].std

#groupedSource_df.groupby('Drug', 'Tumor Volume (mm3)').agg(np.std, ddof=1)

groupedSource_df.groupby('Drug').agg(np.std, ddof=1)

#groupedSource_df
#round(fuel_economy.Combined_MPG.std(),2)

Unnamed: 0_level_0,Tumor Volume (mm3)
Drug,Unnamed: 1_level_1
Capomulin,3.070266
Ceftamin,6.475575
Infubinol,6.975888
Ketapril,8.448745
Naftisol,8.274705
Placebo,7.839506
Propriva,7.020389
Ramicane,3.260406
Stelasyn,7.871403
Zoniferol,7.011808


In [13]:

groupedSource_df.groupby('Drug').agg(np.std, ddof=1)

Unnamed: 0_level_0,Tumor Volume (mm3)
Drug,Unnamed: 1_level_1
Capomulin,3.070266
Ceftamin,6.475575
Infubinol,6.975888
Ketapril,8.448745
Naftisol,8.274705
Placebo,7.839506
Propriva,7.020389
Ramicane,3.260406
Stelasyn,7.871403
Zoniferol,7.011808


In [52]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint

# Attempts to accomplish this using different methods below:

In [None]:
# Using SEM
groupedSource_df['Standard Error'] = sem(groupedSource_df['Tumor Volume (mm3)'])

groupedSource_df

In [15]:
# By resetting the index
groupedSource_df.reset_index
groupedSource_df.std()

Tumor Volume (mm3)    8.980674e+00
Standard Error        1.227398e-15
dtype: float64

In [42]:
# Reset index to remove the extra level
groupedSource_df = groupedSource_df.reset_index()
groupedSource_df.columns = groupedSource_df.columns.get_level_values(0)
groupedSource_df

sem(groupedSource_df['Tumor Volume (mm3)'])

0.8980673775157741

In [32]:
# Using SkipNA
groupedSource_df.sem(axis = 1, skipna = True) 

Drug       Timepoint
Capomulin  0           NaN
           5           NaN
           10          NaN
           15          NaN
           20          NaN
                        ..
Zoniferol  25          NaN
           30          NaN
           35          NaN
           40          NaN
           45          NaN
Length: 100, dtype: float64

In [51]:
# Method using Lambda
groupedSource_df.groupby('Drug').apply(lambda x: np.std(x) / np.mean(x))

Unnamed: 0_level_0,Tumor Volume (mm3)
Drug,Unnamed: 1_level_1
Capomulin,0.071945
Ceftamin,0.114288
Infubinol,0.120405
Ketapril,0.141095
Naftisol,0.140453
Placebo,0.132778
Propriva,0.121287
Ramicane,0.077353
Stelasyn,0.133391
Zoniferol,0.121696


In [None]:
# Convert to DataFrame
stdError_df = pd.DataFrame(groupedSource_df)

In [None]:
# Preview DataFrame
stdError_df

In [3]:
# Example Output

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.0
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure



In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 


In [None]:
# Convert to DataFrame

# Preview DataFrame

In [6]:
# Example Output

Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.16
Capomulin,10,0.32
Capomulin,15,0.375
Capomulin,20,0.652174


In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 


In [None]:
# Convert to DataFrame

# Preview DataFrame

In [7]:
# Example Output

Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.074833
Capomulin,10,0.125433
Capomulin,15,0.132048
Capomulin,20,0.161621


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


In [8]:
# Example Output

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)


In [None]:
# Convert to DataFrame

# Preview DataFrame

In [10]:
# Example Output

Unnamed: 0,Drug,Timepoint,Mouse Count
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [11]:
# Example Output

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [13]:
# Example Output

Drug
Capomulin   -19.475303
Ceftamin     42.516492
Infubinol    46.123472
Ketapril     57.028795
Naftisol     53.923347
Placebo      51.297960
Propriva     47.241175
Ramicane    -22.320900
Stelasyn     52.085134
Zoniferol    46.579751
dtype: float64

In [None]:
# Store all Relevant Percent Changes into a Tuple


In [None]:
# Splice the data between passing and failing drugs


In [None]:
# Orient widths. Add labels, tick marks, etc. 


In [None]:
# Use functions to label the percentages of changes


In [None]:
# Call functions to implement the function calls


In [None]:
# Save the Figure


# Show the Figure
fig.show()

In [None]:
# Example Output

![Metastatic Spread During Treatment](../Images/change.png)