In [44]:
#Magic function notebook
%matplotlib notebook

#Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress
#from sklearn import datasets

#Study data files
mouse_metadata = "data/Mouse_metadata.csv"
study_results = "data/Study_results.csv"

#Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata)
study_results = pd.read_csv(study_results)

#Combine the data into a single dataset
all_data = pd.merge(mouse_metadata, study_results, how='outer', on='Mouse ID')

In [45]:
# Calculate the final tumor volume of each mouse across four of the most promising treatment regimens. 

# Calculate the IQR and quantitatively determine if there are any potential outliers.
# Capomulin, Ramicane, Infubinol, and Ceftamin

# Generate a box plot of the final tumor volume of each mouse across four regimens of interest


In [46]:
mouse = all_data.groupby('Mouse ID')
mouse2 = mouse['Tumor Volume (mm3)'].max()
mouse2 = pd.DataFrame(mouse2)
mouse3 = pd.merge(mouse_metadata, mouse2, how='outer', on='Mouse ID')
mouse3 = mouse3.set_index(['Drug Regimen'])
mouse4 = mouse3.loc[mouse3.index.isin(['Capomulin', 'Ramicane', 'Infubinol', 'Ceftamin'])]
mouse4.to_csv('timepoint.csv', index=False)
mouse4

Unnamed: 0_level_0,Mouse ID,Sex,Age_months,Weight (g),Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ramicane,k403,Male,21,16,45.000000
Capomulin,s185,Female,3,17,45.000000
Capomulin,x401,Female,16,15,45.473753
Capomulin,m601,Male,22,17,45.000000
Ramicane,g791,Male,11,16,45.851531
...,...,...,...,...,...
Ceftamin,x822,Male,3,29,61.386660
Infubinol,y163,Female,17,27,67.685569
Ceftamin,y769,Female,6,27,68.594745
Ceftamin,y865,Male,23,26,64.729837


In [47]:
timepoints = mouse4['Tumor Volume (mm3)'].round(3)
timepoints

Drug Regimen
Ramicane     45.000
Capomulin    45.000
Capomulin    45.474
Capomulin    45.000
Ramicane     45.852
              ...  
Ceftamin     61.387
Infubinol    67.686
Ceftamin     68.595
Ceftamin     64.730
Infubinol    62.754
Name: Tumor Volume (mm3), Length: 100, dtype: float64

In [48]:
fig1, ax1 = plt.subplots()
ax1.set_title('Final Tumor Volume Across')
ax1.set_ylabel('Tumor Volume (mm3)')
ax1.boxplot(timepoints)

<IPython.core.display.Javascript object>

{'whiskers': [<matplotlib.lines.Line2D at 0x13b669a7c08>,
  <matplotlib.lines.Line2D at 0x13b669a7d08>],
 'caps': [<matplotlib.lines.Line2D at 0x13b669a7d88>,
  <matplotlib.lines.Line2D at 0x13b669acc88>],
 'boxes': [<matplotlib.lines.Line2D at 0x13b669a7588>],
 'medians': [<matplotlib.lines.Line2D at 0x13b669acd88>],
 'fliers': [<matplotlib.lines.Line2D at 0x13b669ace08>],
 'means': []}

In [49]:
timepoints.sort_values()
print(timepoints)

Drug Regimen
Ramicane     45.000
Capomulin    45.000
Capomulin    45.474
Capomulin    45.000
Ramicane     45.852
              ...  
Ceftamin     61.387
Infubinol    67.686
Ceftamin     68.595
Ceftamin     64.730
Infubinol    62.754
Name: Tumor Volume (mm3), Length: 100, dtype: float64


In [51]:
quartiles = timepoints.quantile([0.25,0.5,0.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq

print(f"The lower quartile of tumor volume is: {lowerq}")
print(f"The upper quartile of tumor volume is: {upperq}")
print(f"The interquartile range of tumor volume is: {iqr}")
print(f"The the median of tumor volume is: {quartiles[0.5]} ")

lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)
print(f"Values below {lower_bound} could be outliers.")
print(f"Values above {upper_bound} could be outliers.")

The lower quartile of tumor volume is: 45.51
The upper quartile of tumor volume is: 59.930249999999994
The interquartile range of tumor volume is: 14.420249999999996
The the median of tumor volume is: 46.86 
Values below 23.879625000000004 could be outliers.
Values above 81.56062499999999 could be outliers.
