In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
mouse_file = "../Resources/Mouse_metadata.csv"
results_file = "../Resources/Study_results.csv"

In [3]:
mouse_df = pd.read_csv(mouse_file, encoding="utf8")
results_df = pd.read_csv(results_file, encoding="utf8")

In [4]:
mouse_df

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g)
0,k403,Ramicane,Male,21,16
1,s185,Capomulin,Female,3,17
2,x401,Capomulin,Female,16,15
3,m601,Capomulin,Male,22,17
4,g791,Ramicane,Male,11,16
...,...,...,...,...,...
244,z314,Stelasyn,Female,21,28
245,z435,Propriva,Female,12,26
246,z581,Infubinol,Female,24,25
247,z795,Naftisol,Female,13,29


In [5]:
results_df

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.000000,0
1,f932,0,45.000000,0
2,g107,0,45.000000,0
3,a457,0,45.000000,0
4,c819,0,45.000000,0
...,...,...,...,...
1888,r944,45,41.581521,2
1889,u364,45,31.023923,3
1890,p438,45,61.433892,1
1891,x773,45,58.634971,4


In [6]:
# Check for duplicate timepoints with a single mouse ID and create a new column
results_df["Duplicate"] = results_df.duplicated(subset=["Mouse ID", "Timepoint"], keep=False)

# Remove duplicates
results_reduced_df = results_df.loc[results_df["Duplicate"] == False, ["Mouse ID", "Timepoint", "Tumor Volume (mm3)",
                                                                      "Metastatic Sites"]]
results_reduced_df

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.000000,0
1,f932,0,45.000000,0
2,g107,0,45.000000,0
3,a457,0,45.000000,0
4,c819,0,45.000000,0
...,...,...,...,...
1888,r944,45,41.581521,2
1889,u364,45,31.023923,3
1890,p438,45,61.433892,1
1891,x773,45,58.634971,4


In [7]:
# Identify duplicates
#results_df.loc[results_df["Duplicate"] == True, ["Mouse ID", "Timepoint", "Tumor Volume (mm3)","Metastatic Sites"]]

In [8]:
tumor_mean = results_reduced_df["Tumor Volume (mm3)"].mean()

In [9]:
# Merge DataFrames 
merged_df = pd.merge(mouse_df, results_reduced_df, on="Mouse ID", how="left")
merged_df

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.000000,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1
...,...,...,...,...,...,...,...,...
1878,z969,Naftisol,Male,9,30,25,63.145652,2
1879,z969,Naftisol,Male,9,30,30,65.841013,3
1880,z969,Naftisol,Male,9,30,35,69.176246,4
1881,z969,Naftisol,Male,9,30,40,70.314904,4


In [10]:
#merged_df.to_csv("merged_df.csv", header=True)

In [11]:
# Calculate mean of volume
mean_volume = merged_df.groupby("Drug Regimen").mean()

# Convert to DataFrame
mean_volume_df = pd.DataFrame(mean_volume)
# Remove other columns
mean_volume_df = mean_volume_df[["Tumor Volume (mm3)"]]
# Rename column
mean_volume_df = mean_volume_df.rename(columns={"Tumor Volume (mm3)": "Mean Tumor Volume"})
# Reset the index
mean_volume_df = mean_volume_df.reset_index()

mean_volume_df

Unnamed: 0,Drug Regimen,Mean Tumor Volume
0,Capomulin,40.675741
1,Ceftamin,52.591172
2,Infubinol,52.884795
3,Ketapril,55.235638
4,Naftisol,54.331565
5,Placebo,54.033581
6,Propriva,52.458254
7,Ramicane,40.216745
8,Stelasyn,54.233149
9,Zoniferol,53.236507


In [18]:
# Calculate the median of the volume
median_volume = merged_df.groupby("Drug Regimen").median()

# Convert to DataFrame
median_volume_df = pd.DataFrame(median_volume)
# Reset the index
median_volume_df = median_volume_df.reset_index()
# Remove other columns
median_volume_df = median_volume_df[["Tumor Volume (mm3)"]]
# Rename column
median_volume_df = median_volume_df.rename(columns={"Tumor Volume (mm3)": "Median Tumor Volume"})


median_volume_df

Unnamed: 0,Median Tumor Volume
0,41.557809
1,51.776157
2,51.820584
3,53.698743
4,52.509285
5,52.288934
6,50.854632
7,40.673236
8,52.431737
9,51.818479


In [19]:
# Calculate the variance of the volume
variance_volume = merged_df.groupby("Drug Regimen").var()

# Convert to DataFrame
variance_volume_df = pd.DataFrame(variance_volume)
# Reset the index
variance_volume_df = variance_volume_df.reset_index()
# Remove other columns
variance_volume_df = variance_volume_df[["Tumor Volume (mm3)"]]
# Rename column
variance_volume_df = variance_volume_df.rename(columns={"Tumor Volume (mm3)": "Tumor Volume Variance"})


variance_volume_df

Unnamed: 0,Tumor Volume Variance
0,24.947764
1,39.290177
2,43.128684
3,68.553577
4,66.173479
5,61.168083
6,44.053659
7,23.486704
8,59.450562
9,48.533355


In [20]:
# Calculate Standard Deviation
std_volume = merged_df.groupby("Drug Regimen").std()

# Convert to DataFrame
std_volume_df = pd.DataFrame(std_volume)
# Reset the index
std_volume_df = std_volume_df.reset_index()
# Remove other columns
std_volume_df = std_volume_df[["Tumor Volume (mm3)"]]
# Rename column
std_volume_df = std_volume_df.rename(columns={"Tumor Volume (mm3)": "StD Tumor Volume"})


std_volume_df

Unnamed: 0,StD Tumor Volume
0,4.994774
1,6.268188
2,6.567243
3,8.279709
4,8.134708
5,7.821003
6,6.637293
7,4.846308
8,7.710419
9,6.966589


In [21]:
# Calculate Standard Error of the Mean
sem_volume = merged_df.groupby("Drug Regimen").sem()

# Convert to DataFrame
sem_volume_df = pd.DataFrame(sem_volume)
# Reset the index
sem_volume_df = sem_volume_df.reset_index()
# Remove other columns
sem_volume_df = sem_volume_df[["Tumor Volume (mm3)"]]
# Rename column
sem_volume_df = sem_volume_df.rename(columns={"Tumor Volume (mm3)": "SEM Tumor Volume"})


sem_volume_df

Unnamed: 0,SEM Tumor Volume
0,0.329346
1,0.469821
2,0.492236
3,0.60386
4,0.596466
5,0.581331
6,0.540135
7,0.320955
8,0.573111
9,0.516398


In [22]:
# Join tables on index
summary_stats = mean_volume_df.join([median_volume_df, variance_volume_df, std_volume_df, sem_volume_df])
summary_stats

Unnamed: 0,Drug Regimen,Mean Tumor Volume,Median Tumor Volume,Tumor Volume Variance,StD Tumor Volume,SEM Tumor Volume
0,Capomulin,40.675741,41.557809,24.947764,4.994774,0.329346
1,Ceftamin,52.591172,51.776157,39.290177,6.268188,0.469821
2,Infubinol,52.884795,51.820584,43.128684,6.567243,0.492236
3,Ketapril,55.235638,53.698743,68.553577,8.279709,0.60386
4,Naftisol,54.331565,52.509285,66.173479,8.134708,0.596466
5,Placebo,54.033581,52.288934,61.168083,7.821003,0.581331
6,Propriva,52.458254,50.854632,44.053659,6.637293,0.540135
7,Ramicane,40.216745,40.673236,23.486704,4.846308,0.320955
8,Stelasyn,54.233149,52.431737,59.450562,7.710419,0.573111
9,Zoniferol,53.236507,51.818479,48.533355,6.966589,0.516398
