In [31]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)

mouse_drug_data_path=os.path.join('data', 'mouse_drug_data.csv')


clinical_trial_data_path =os.path.join('data', 'clinicaltrial_data.csv')

# Read the Mouse and Drug Data and the Clinical Trial Data

mouse_drug_df=pd.read_csv(mouse_drug_data_path)

print(mouse_drug_df.head())

clinical_trial_df=pd.read_csv(clinical_trial_data_path)

print(clinical_trial_df)
# Combine the data into a single dataset

merged_df=pd.merge(mouse_drug_df, clinical_trial_df[['Mouse ID', 'Timepoint', 'Tumor Volume (mm3)', 'Metastatic Sites']], 
                  on='Mouse ID', how='outer')
# Display the data table for preview
merged_df.head()


  Mouse ID      Drug
0     f234  Stelasyn
1     x402  Stelasyn
2     a492  Stelasyn
3     w540  Stelasyn
4     v764  Stelasyn
     Mouse ID  Timepoint  Tumor Volume (mm3)  Metastatic Sites
0        b128          0           45.000000                 0
1        f932          0           45.000000                 0
2        g107          0           45.000000                 0
3        a457          0           45.000000                 0
4        c819          0           45.000000                 0
...       ...        ...                 ...               ...
1888     r944         45           41.581521                 2
1889     u364         45           31.023923                 3
1890     p438         45           61.433892                 1
1891     x773         45           58.634971                 4
1892     b879         45           72.555239                 2

[1893 rows x 4 columns]


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [32]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 

mean_tumor_volume_by_drug_and_time=merged_df.groupby(['Drug', 'Timepoint']).agg({'Tumor Volume (mm3)':['mean']})




mean_tumor_volume_by_drug_and_time.columns=['average_tumor_vol']
mean_tumor_volume_by_drug_and_time.reset_index(inplace=True)

#with pd.option_context('display.multi_sparse', False):

mean_tumor_volume_by_drug_and_time.head(20)

# Convert to DataFrame

# Preview DataFrame


Unnamed: 0,Drug,Timepoint,average_tumor_vol
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
5,Capomulin,25,39.939528
6,Capomulin,30,38.769339
7,Capomulin,35,37.816839
8,Capomulin,40,36.958001
9,Capomulin,45,36.236114


In [33]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint

st_error_tumor_vol_df=merged_df.groupby(['Drug', 'Timepoint']).agg({'Tumor Volume (mm3)':['sem']})

st_error_tumor_vol_df.reset_index()
st_error_tumor_vol_df.columns=['st_error_tumor_vol']

st_error_tumor_vol_df.head()
# Convert to DataFrame

# Preview DataFrame



Unnamed: 0_level_0,Unnamed: 1_level_0,st_error_tumor_vol
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.448593
Capomulin,10,0.702684
Capomulin,15,0.838617
Capomulin,20,0.909731


In [34]:
mean_tumor_volume_by_drug_and_time

Unnamed: 0,Drug,Timepoint,average_tumor_vol
0,Capomulin,0,45.000000
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
...,...,...,...
95,Zoniferol,25,55.432935
96,Zoniferol,30,57.713531
97,Zoniferol,35,60.089372
98,Zoniferol,40,62.916692


In [36]:
# Minor Data Munging to Re-Format the Data Frames

# Make lists of unique drugs and unique timepoints in merged_df

uni_drugs_list=merged_df['Drug'].unique()
uni_time_point=merged_df['Timepoint'].unique()
# make empty dataframe 


mean_tumor_volume_df2=pd.DataFrame()


#put nan values in dataframe with unique list of drugs as columns

    
uni_drugs_list_length=len(uni_drugs_list)
    
uni_time_point_length=len(uni_time_point)

        
for i in mean_tumor_volume_by_drug_and_time.index:
    for x in range(0,uni_drugs_list_length):
        this_drug=uni_drugs_list[x]
        this_aver_tum_vol=mean_tumor_volume_by_drug_and_time.loc[i, 'average_tumor_vol']
        this_timepoint=mean_tumor_volume_by_drug_and_time.loc[i, 'Timepoint']
        if mean_tumor_volume_by_drug_and_time.loc[i, 'Drug']==this_drug:
            mean_tumor_volume_df2=mean_tumor_volume_df2.append({this_drug: this_aver_tum_vol, 'time_point': this_timepoint }, ignore_index=True)

    


           
            



    
print(mean_tumor_volume_df2)


    
# Preview that Reformatting worked


    Capomulin  time_point  Ceftamin  Infubinol  Ketapril  Naftisol  Placebo  \
0   45.000000         0.0       NaN        NaN       NaN       NaN      NaN   
1   44.266086         5.0       NaN        NaN       NaN       NaN      NaN   
2   43.084291        10.0       NaN        NaN       NaN       NaN      NaN   
3   42.064317        15.0       NaN        NaN       NaN       NaN      NaN   
4   40.716325        20.0       NaN        NaN       NaN       NaN      NaN   
..        ...         ...       ...        ...       ...       ...      ...   
95        NaN        25.0       NaN        NaN       NaN       NaN      NaN   
96        NaN        30.0       NaN        NaN       NaN       NaN      NaN   
97        NaN        35.0       NaN        NaN       NaN       NaN      NaN   
98        NaN        40.0       NaN        NaN       NaN       NaN      NaN   
99        NaN        45.0       NaN        NaN       NaN       NaN      NaN   

    Propriva  Ramicane  Stelasyn  Zoniferol  
0    

In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure



In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
mean_mt_site_df=merged_df.groupby(['Drug', 'Timepoint']).agg({'Metastatic Sites':['mean']})

mean_mt_site_df.reset_index()
mean_mt_site_df.columns=['mean_mt_site']


# Convert to DataFrame
mean_mt_site_df
# Preview DataFrame


In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 

met_site_ste_df=merged_df.groupby(['Drug', 'Timepoint']).agg({'Metastatic Sites':['sem']})

met_site_ste_df.reset_index()
met_site_ste_df.columns=['met_site_ste']

met_site_ste_df

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

mice_count_grouped_df=merged_df.groupby(['Drug', 'Timepoint']).agg({'Mouse ID':['count']})
mice_count_grouped_df.reset_index()

mice_count_grouped_df.columns=['mouse_count']

mice_count_grouped_df

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
#fig.show()

![Metastatic Spread During Treatment](../Images/change.png)