# Pymaceuticals Inc.

# ------------------------------------------------------------------------------------

# Importing Data and Setup

In [46]:
# importing setup
import pathlib as path
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

In [47]:
# importing the data from csv files
mouse_meta_data = pd.read_csv("Mouse_metadata.csv")
study_results = pd.read_csv("Study_results.csv")

# merging data into one df
df_complete_data = pd.merge(mouse_meta_data, study_results, how = "left", on = ["Mouse ID"])
df_complete_data.head(10)

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1
5,k403,Ramicane,Male,21,16,25,33.464577,1
6,k403,Ramicane,Male,21,16,30,31.099498,1
7,k403,Ramicane,Male,21,16,35,26.546993,1
8,k403,Ramicane,Male,21,16,40,24.365505,1
9,k403,Ramicane,Male,21,16,45,22.050126,1


In [48]:
# counting the number of mice in the data frame
df_complete_data["Mouse ID"].nunique()

249

In [49]:
#checking to see if any missing data is present
df_complete_data.isnull().mean()

Mouse ID              0.0
Drug Regimen          0.0
Sex                   0.0
Age_months            0.0
Weight (g)            0.0
Timepoint             0.0
Tumor Volume (mm3)    0.0
Metastatic Sites      0.0
dtype: float64

In [50]:
# checking to see if there are any duplicates in the data frome and printing it 
duplicates = df_complete_data.loc[df_complete_data.duplicated(subset=['Mouse ID', 'Timepoint']), "Mouse ID"].unique()
df_complete_data.loc[df_complete_data["Mouse ID"] == "g989"] 

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
908,g989,Propriva,Female,21,26,0,45.0,0
909,g989,Propriva,Female,21,26,0,45.0,0
910,g989,Propriva,Female,21,26,5,48.786801,0
911,g989,Propriva,Female,21,26,5,47.570392,0
912,g989,Propriva,Female,21,26,10,51.745156,0
913,g989,Propriva,Female,21,26,10,49.880528,0
914,g989,Propriva,Female,21,26,15,51.325852,1
915,g989,Propriva,Female,21,26,15,53.44202,0
916,g989,Propriva,Female,21,26,20,55.326122,1
917,g989,Propriva,Female,21,26,20,54.65765,1


In [51]:
# dropping the duplicated mouse form the data frame
duplicate_mouse = "g989"
rows_dropped = df_complete_data[df_complete_data["Mouse ID"] == duplicate_mouse].index

#creating the cleaned df without the duplicated data
df_cleaned_data = df_complete_data.drop(rows_dropped)
df_cleaned_data

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.000000,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1
...,...,...,...,...,...,...,...,...
1888,z969,Naftisol,Male,9,30,25,63.145652,2
1889,z969,Naftisol,Male,9,30,30,65.841013,3
1890,z969,Naftisol,Male,9,30,35,69.176246,4
1891,z969,Naftisol,Male,9,30,40,70.314904,4


In [52]:
# comparing the number of mice to the starter code to verify the duplicated data is dropped
df_cleaned_data["Mouse ID"].nunique()

248

In [53]:
# displaying the cleaned data
df_cleaned_data.sort_values(by=['Timepoint'], inplace = True)
df_cleaned_data

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.000000,0
1226,n678,Propriva,Male,5,29,0,45.000000,0
1229,n763,Placebo,Female,16,25,0,45.000000,0
341,v923,Capomulin,Female,19,21,0,45.000000,0
338,w150,Capomulin,Male,23,23,0,45.000000,0
...,...,...,...,...,...,...,...,...
1162,l733,Ceftamin,Female,4,30,45,64.299830,1
1763,x402,Stelasyn,Male,21,28,45,61.619606,3
59,s508,Ramicane,Male,1,17,45,30.276232,0
360,g316,Capomulin,Female,22,22,45,40.159220,2


In [56]:
# print cleaned data
df_cleaned_data.reset_index(drop = True).head(5)

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,n678,Propriva,Male,5,29,0,45.0,0
2,n763,Placebo,Female,16,25,0,45.0,0
3,v923,Capomulin,Female,19,21,0,45.0,0
4,w150,Capomulin,Male,23,23,0,45.0,0


# Summary Statistics

In [70]:
# Creating variables to hold stats
mean_tumor_vol = df_cleaned_data.groupby(["Drug Regimen"])["Tumor Volume (mm3)"].mean()
med_tumor_vol = df_cleaned_data.groupby(["Drug Regimen"])["Tumor Volume (mm3)"].median()
tumor_vol_var = df_cleaned_data.groupby(["Drug Regimen"])["Tumor Volume (mm3)"].var()
tumor_vol_stdv = df_cleaned_data.groupby(["Drug Regimen"])["Tumor Volume (mm3)"].std()
tumor_vol_sterr = df_cleaned_data.groupby(["Drug Regimen"])["Tumor Volume (mm3)"].sem()

In [72]:
# Creating dict to hold variable for df
summary_statistics = {
                    "Mean Tumor Volume": mean_tumor_vol,
                    "Median Tumor Volume": med_tumor_vol,
                    "Tumor Volume Variance": tumor_vol_var,
                    "Tumor Volume Std. Dev": tumor_vol_stdv,
                    "Tumor Volume Std. Err": tumor_vol_sterr
                     } 
# creating the df
df_summary_statistics = pd.DataFrame(summary_statistics)

#calling the df
df_summary_statistics

Unnamed: 0_level_0,Mean Tumor Volume,Median Tumor Volume,Tumor Volume Variance,Tumor Volume Std. Dev,Tumor Volume Std. Err
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Capomulin,40.675741,41.557809,24.947764,4.994774,0.329346
Ceftamin,52.591172,51.776157,39.290177,6.268188,0.469821
Infubinol,52.884795,51.820584,43.128684,6.567243,0.492236
Ketapril,55.235638,53.698743,68.553577,8.279709,0.60386
Naftisol,54.331565,52.509285,66.173479,8.134708,0.596466
Placebo,54.033581,52.288934,61.168083,7.821003,0.581331
Propriva,52.32093,50.446266,43.852013,6.622085,0.544332
Ramicane,40.216745,40.673236,23.486704,4.846308,0.320955
Stelasyn,54.233149,52.431737,59.450562,7.710419,0.573111
Zoniferol,53.236507,51.818479,48.533355,6.966589,0.516398
