## Pharmaceuticals Analysis using Matplotlib

In [12]:
# Libraries: import python dependencies/libraries to facilitate analysis.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st

# File Paths: Create variables that will identify where the file locations are, where the data is stored.
# These are the variables that hold the csv path location
mouse_metadata_path = "data/Mouse_metadata.csv"
study_results_path = "data/Study_results.csv"

# Read Files: Create variables that will read in the data based on the file locations.
# These are the two variables that will read in the csv file data based on the path location variables already created.
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

In [13]:
# Test1: Call the data using the "read file" variables that were defined in the setup section.
mouse_metadata.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g)
0,k403,Ramicane,Male,21,16
1,s185,Capomulin,Female,3,17
2,x401,Capomulin,Female,16,15
3,m601,Capomulin,Male,22,17
4,g791,Ramicane,Male,11,16


In [14]:
#Test2: The variables cannot be called in the same code block because it will call only the last line. Read the data using 
# the second defined variable.
study_results.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.0,0
1,f932,0,45.0,0
2,g107,0,45.0,0
3,a457,0,45.0,0
4,c819,0,45.0,0


In [15]:
# Merge the two datasets on the "Mouse ID" column. 
# Define variable and use pandas merge function (pd.merge) on "Mouse ID" so that no data is lost
merged_mouse_study = pd.merge(mouse_metadata, study_results, how='outer', on='Mouse ID')

merged_mouse_study.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1


In [16]:
# Unique Count:
# Display a list of each column with the .nunique function

merged_mouse_study.nunique() 

Mouse ID               249
Drug Regimen            10
Sex                      2
Age_months              24
Weight (g)              16
Timepoint               10
Tumor Volume (mm3)    1644
Metastatic Sites         5
dtype: int64

In [17]:
# Print Statement:
# Print a statement of how many Mouse ID's there are.

print("There are 249 Mouse IDs in this dataset.")

There are 249 Mouse IDs in this dataset.


In [18]:
# Finding All Duplicates:
# Getting the Mouse ID numbers that show up with duplicate Timepoints. 
# Create a DataFrame that holds just the Mouse ID and Timepoints and assign it a variable name
mouse_timepoint_duplicates = pd.DataFrame(merged_mouse_study, columns=['Mouse ID', 'Timepoint'])
# Print the DataFrame
mouse_timepoint_duplicates

Unnamed: 0,Mouse ID,Timepoint
0,k403,0
1,k403,5
2,k403,10
3,k403,15
4,k403,20
...,...,...
1888,z969,25
1889,z969,30
1890,z969,35
1891,z969,40


In [19]:
# Finding Duplicates (exclude first instances):
# Use pandas function find duplicate (.duplicated) with the duplicates DataFrame variable to identify repeats
duplicate_results = mouse_timepoint_duplicates[mouse_timepoint_duplicates.duplicated()]
# Create and print a header description for results
print("The Duplicate Mouse ID/Timepoint rows, except first instances, are:")
# Print the findings
print(duplicate_results)

The Duplicate Mouse ID/Timepoint rows, except first instances, are:
    Mouse ID  Timepoint
909     g989          0
911     g989          5
913     g989         10
915     g989         15
917     g989         20


In [23]:
# Locate Specific:
# Optional: Display all the data for the duplicate mouse ID. 
# With the original merged dataset variable, use pandas locate (.loc) to find the desired column and associated row
merged_mouse_study.loc[merged_mouse_study['Mouse ID'] == 'c819']

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
717,c819,Ketapril,Male,21,25,0,45.0,0
718,c819,Ketapril,Male,21,25,5,45.769249,1
719,c819,Ketapril,Male,21,25,10,46.658395,1
720,c819,Ketapril,Male,21,25,15,48.370999,1
721,c819,Ketapril,Male,21,25,20,49.762415,1
722,c819,Ketapril,Male,21,25,25,51.828357,1
723,c819,Ketapril,Male,21,25,30,56.098998,1
724,c819,Ketapril,Male,21,25,35,57.729535,1
725,c819,Ketapril,Male,21,25,40,62.175705,1
