In [12]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Importing Scipy
from scipy.stats import sem


# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug_df = pd.read_csv(mouse_drug_data_to_load)
clinical_trial_df = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
clinical_mouse = pd.merge(clinical_trial_df, mouse_drug_df, how='inner')

# Display the data table for preview

#mouse_drug_df -- checking sample
#clinical_trial_df -- checking sample
clinical_mouse.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


In [23]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
tumor_volume_df = clinical_mouse.loc[:,['Drug', 'Timepoint', 'Tumor Volume (mm3)']]
#tumor_volume_df.head() --checking sample

mean_sem_tv = tumor_volume_df.groupby(['Drug', 'Timepoint']).agg({"Tumor Volume (mm3)" :["mean", "sem"]})
mean_sem_tv.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3),Tumor Volume (mm3)
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,sem
Drug,Timepoint,Unnamed: 2_level_2,Unnamed: 3_level_2
Capomulin,0,45.0,0.0
Capomulin,5,44.266086,0.448593
Capomulin,10,43.084291,0.702684
Capomulin,15,42.064317,0.838617
Capomulin,20,40.716325,0.909731


In [25]:
# Convert to DataFrame
meta_sites_df = clinical_mouse.loc[:,['Drug', 'Timepoint', 'Metastatic Sites']]

# Preview DataFrame
meta_sites_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0
1,Capomulin,5,0
2,Capomulin,10,0
3,Capomulin,15,0
4,Capomulin,20,0


In [None]:
# Create lists of the tumor volume means for each of the four drugs being converted to dataframe
cap_tvmean_list = mean_sem_tv.loc['Capomulin'].loc[:, 'Tumor Volume (mm3)'].loc[:,'mean'].tolist()
inf_tvmean_list = mean_sem_tv.loc['Infubinol'].loc[:, 'Tumor Volume (mm3)'].loc[:,'mean'].tolist()
ket_tvmean_list = mean_sem_tv.loc['Ketapril'].loc[:, 'Tumor Volume (mm3)'].loc[:,'mean'].tolist()
plc_tvmean_list = mean_sem_tv.loc['Placebo'].loc[:, 'Tumor Volume (mm3)'].loc[:,'mean'].tolist()

# Create lists of the tumor volume sems for each of the four drugs being converted to dataframe
cap_tvsem_list = mean_sem_tv.loc['Capomulin'].loc[:, 'Tumor Volume (mm3)'].loc[:,'sem'].tolist()
inf_tvsem_list = mean_sem_tv.loc['Infubinol'].loc[:, 'Tumor Volume (mm3)'].loc[:,'sem'].tolist()
ket_tvsem_list = mean_sem_tv.loc['Ketapril'].loc[:, 'Tumor Volume (mm3)'].loc[:,'sem'].tolist()
plc_tvsem_list = mean_sem_tv.loc['Placebo'].loc[:, 'Tumor Volume (mm3)'].loc[:,'sem'].tolist()