## Observations and Insights 

In [128]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# Study data files
mouse_metadata_path = "Resources/mouse_metadata.csv"
study_results_path = "Resources/study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

# Combine the data into a single dataset
all_data = pd.merge(mouse_metadata, study_results, how="left", on=["Mouse ID", "Mouse ID"])
# Display the data table for preview
all_data

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.000000,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1
...,...,...,...,...,...,...,...,...
1888,z969,Naftisol,Male,9,30,25,63.145652,2
1889,z969,Naftisol,Male,9,30,30,65.841013,3
1890,z969,Naftisol,Male,9,30,35,69.176246,4
1891,z969,Naftisol,Male,9,30,40,70.314904,4


In [129]:
# Checking the number of mice.
all_data["Mouse ID"].count()

1893

In [130]:
# Getting the duplicate mice by ID number that shows up for Mouse ID and Timepoint. 
duplicates = all_data[all_data.duplicated(keep="first")]
duplicates["Mouse ID"].count()


1

In [131]:
# Optional: Get all the data for the duplicate mouse ID. 
duplicates

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
909,g989,Propriva,Female,21,26,0,45.0,0


In [132]:
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
cleaned_data = all_data.drop([0,909])

In [133]:
# Checking the number of mice in the clean DataFrame.
cleaned_data["Mouse ID"].count()

1891

## Summary Statistics

In [146]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 
# Assemble the resulting series into a single summary dataframe.

volume = cleaned_data["Tumor Volume (mm3)"]
regimen = cleaned_data.groupby("Drug Regimen")["Drug Regimen"]

mean_volume = np.mean(volume)
median_volume = np.median(volume)
var_volume = np.var(volume)
sd_volume = np.std(volume)
SEM = volume.sem()

volume_summary_table = pd.DataFrame({
    "Drug Regimen":[regimen],
    "Mean":[mean_volume],
    "Median":[median_volume],
    "Variance":[var_volume],
    "Standard Deviation":[sd_volume],
    "SEM":[SEM]
})

volume_summary_table.set_index("Drug Regimen")


Unnamed: 0_level_0,Mean,Median,Variance,Standard Deviation,SEM
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"((Capomulin, [Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, Capomulin, ...]), (Ceftamin, [Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, Ceftamin, ...]), (Infubinol, [Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, Infubinol, ...]), (Ketapril, [Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, Ketapril, ...]), (Naftisol, [Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, Naftisol, ...]), (Placebo, [Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, Placebo, ...]), (Propriva, [Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, Propriva, ...]), (Ramicane, [Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, Ramicane, ...]), (Stelasyn, [Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, Stelasyn, ...]), (Zoniferol, [Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, Zoniferol, ...]))",50.454143,48.957919,79.126483,8.895307,0.204611


In [125]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 
# Assemble the resulting series into a single summary dataframe.


In [150]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 
# Assemble the resulting series into a single summary dataframe.

volume = cleaned_data["Tumor Volume (mm3)"]
regimen = cleaned_data.groupby("Drug Regimen")["Drug Regimen"]


mean_volume = np.mean(volume)
median_volume = np.median(volume)
var_volume = np.var(volume)
sd_volume = np.std(volume)
SEM = volume.sem(volume)

volume_summary_table = pd.DataFrame({
    "Drug Regimen":[regimen],
    "Mean":[mean_volume],
    "Median":[median_volume],
    "Variance":[var_volume],
    "Standard Deviation":[sd_volume],
    "SEM":[SEM]
})

volume_summary_table.set_index("Drug Regimen")


TypeError: mean() got an unexpected keyword argument 'axis'

In [152]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 
# Assemble the resulting series into a single summary dataframe.

volume = cleaned_data["Tumor Volume (mm3)"]
regimen = cleaned_data.groupby("Drug Regimen")["Drug Regimen"]

mean_volume = np.mean(volume)
median_volume = np.median(volume)
var_volume = np.var(volume)
sd_volume = np.std(volume)
SEM = volume.sem()

volume_summary_table = pd.DataFrame({
    "Drug Regimen":[regimen],
    "Mean":[mean_volume],
    "Median":[median_volume],
    "Variance":[var_volume],
    "Standard Deviation":[sd_volume],
    "SEM":[SEM]
})

volume_summary_table


Unnamed: 0,Drug Regimen,Mean,Median,Variance,Standard Deviation,SEM
0,<pandas.core.groupby.generic.SeriesGroupBy obj...,50.454143,48.957919,79.126483,8.895307,0.204611


In [8]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Using the aggregation method, produce the same summary statistics in a single line


## Bar and Pie Charts

In [9]:
# Generate a bar plot showing the total number of measurements taken on each drug regimen using pandas.



In [10]:
# Generate a bar plot showing the total number of measurements taken on each drug regimen using pyplot.



In [11]:
# Generate a pie plot showing the distribution of female versus male mice using pandas



In [12]:
# Generate a pie plot showing the distribution of female versus male mice using pyplot



## Quartiles, Outliers and Boxplots

In [13]:
# Calculate the final tumor volume of each mouse across four of the treatment regimens:  
# Capomulin, Ramicane, Infubinol, and Ceftamin

# Start by getting the last (greatest) timepoint for each mouse


# Merge this group df with the original dataframe to get the tumor volume at the last timepoint


In [14]:
# Put treatments into a list for for loop (and later for plot labels)


# Create empty list to fill with tumor vol data (for plotting)


# Calculate the IQR and quantitatively determine if there are any potential outliers. 

    
    # Locate the rows which contain mice on each drug and get the tumor volumes
    
    
    # add subset 
    
    
    # Determine outliers using upper and lower bounds
    

In [15]:
# Generate a box plot of the final tumor volume of each mouse across four regimens of interest


## Line and Scatter Plots

In [16]:
# Generate a line plot of tumor volume vs. time point for a mouse treated with Capomulin


In [17]:
# Generate a scatter plot of average tumor volume vs. mouse weight for the Capomulin regimen


## Correlation and Regression

In [18]:
# Calculate the correlation coefficient and linear regression model 
# for mouse weight and average tumor volume for the Capomulin regimen
