# Pymaceuticals Inc.
---

### Analysis

- Add your analysis here.
 

In [207]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

# Study data files
mouse_metadata_path = "data/Mouse_metadata.csv"
study_results_path = "data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

# Combine the data into a single DataFrame
mouse_study_combined = study_results.merge(mouse_metadata, how='left')

# Display the data table for preview
mouse_study_combined.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.0,0,Capomulin,Female,9,22
1,f932,0,45.0,0,Ketapril,Male,15,29
2,g107,0,45.0,0,Ketapril,Female,2,29
3,a457,0,45.0,0,Ketapril,Female,11,30
4,c819,0,45.0,0,Ketapril,Male,21,25


In [208]:
# Checking the number of mice.
number_of_mice = len(mouse_study_combined.groupby("Mouse ID")["Mouse ID"].unique())
number_of_mice

249

In [209]:
# Our data should be uniquely identified by Mouse ID and Timepoint
# Get the duplicate mice by ID number that shows up for Mouse ID and Timepoint.
duplicate_mouse = mouse_study_combined[mouse_study_combined.duplicated()]
duplicate_mouse_id = duplicate_mouse["Mouse ID"].values
duplicate_mouse_id


array(['g989'], dtype=object)

In [210]:
# Optional: Get all the data for the duplicate mouse ID. 
duplicate_mouse_data = mouse_study_combined[mouse_study_combined.duplicated(subset=["Mouse ID", "Timepoint"], keep = False)]
duplicate_mouse_data

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
107,g989,0,45.0,0,Propriva,Female,21,26
137,g989,0,45.0,0,Propriva,Female,21,26
329,g989,5,48.786801,0,Propriva,Female,21,26
360,g989,5,47.570392,0,Propriva,Female,21,26
620,g989,10,51.745156,0,Propriva,Female,21,26
681,g989,10,49.880528,0,Propriva,Female,21,26
815,g989,15,51.325852,1,Propriva,Female,21,26
869,g989,15,53.44202,0,Propriva,Female,21,26
950,g989,20,55.326122,1,Propriva,Female,21,26
1111,g989,20,54.65765,1,Propriva,Female,21,26


In [211]:
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
#mouse_data_cleaned = mouse_study_combined.drop_duplicates(subset=["Mouse ID", "Timepoint"], keep="first")

mouse_study_combined.drop(mouse_study_combined.loc[mouse_study_combined['Mouse ID']==duplicate_mouse_id[0]].index, inplace=True)
mouse_study_combined

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,f932,0,45.000000,0,Ketapril,Male,15,29
2,g107,0,45.000000,0,Ketapril,Female,2,29
3,a457,0,45.000000,0,Ketapril,Female,11,30
4,c819,0,45.000000,0,Ketapril,Male,21,25
...,...,...,...,...,...,...,...,...
1888,r944,45,41.581521,2,Capomulin,Male,12,25
1889,u364,45,31.023923,3,Capomulin,Male,18,17
1890,p438,45,61.433892,1,Ceftamin,Female,11,26
1891,x773,45,58.634971,4,Placebo,Female,21,30


In [214]:
# Checking the number of mice in the clean DataFrame.
len(mouse_study_combined.groupby("Mouse ID")["Mouse ID"].unique())

248

## Summary Statistics

In [232]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# Use groupby and summary statistical methods to calculate the following properties of each drug regimen: 
# mean, median, variance, standard deviation, and SEM of the tumor volume. 
# Assemble the resulting series into a single summary DataFrame.

drug_mean = mouse_study_combined.groupby("Drug Regimen")["Tumor Volume (mm3)"].mean()
drug_variance = mouse_study_combined.groupby("Drug Regimen")["Tumor Volume (mm3)"].var()
drug_median = mouse_study_combined.groupby("Drug Regimen")["Tumor Volume (mm3)"].median()
drug_std = mouse_study_combined.groupby("Drug Regimen")["Tumor Volume (mm3)"].std()
drug_sem = mouse_study_combined.groupby("Drug Regimen")["Tumor Volume (mm3)"].sem()

stat_table = pd.DataFrame(
                        {"Mean Tumor Volume": drug_mean,
                        "Median Tumor Volume": drug_median,
                        "Tumor Volume Variance": drug_variance,
                        "Tumor Volume Std. Dev.": drug_std,
                        "Tumor Volume Std. Err.": drug_sem})
stat_table

Unnamed: 0_level_0,Mean Tumor Volume,Median Tumor Volume,Tumor Volume Variance,Tumor Volume Std. Dev.,Tumor Volume Std. Err.
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Capomulin,40.675741,41.557809,24.947764,4.994774,0.329346
Ceftamin,52.591172,51.776157,39.290177,6.268188,0.469821
Infubinol,52.884795,51.820584,43.128684,6.567243,0.492236
Ketapril,55.235638,53.698743,68.553577,8.279709,0.60386
Naftisol,54.331565,52.509285,66.173479,8.134708,0.596466
Placebo,54.033581,52.288934,61.168083,7.821003,0.581331
Propriva,52.32093,50.446266,43.852013,6.622085,0.544332
Ramicane,40.216745,40.673236,23.486704,4.846308,0.320955
Stelasyn,54.233149,52.431737,59.450562,7.710419,0.573111
Zoniferol,53.236507,51.818479,48.533355,6.966589,0.516398


In [236]:
# A more advanced method to generate a summary statistics table of mean, median, variance, standard deviation,
# and SEM of the tumor volume for each regimen (only one method is required in the solution)

# Using the aggregation method, produce the same summary statistics in a single line
mouse_study_combined.aggregate(func= 'mean')

TypeError: Could not convert ['b128f932g107a457c819h246p189n923q119f993z234b559y260x930o725z969v835r604n304l700x336l725m133v295a818y601t724k382w422c326c139v339a577y163k483k804o809z581a251i386c580q132u327v603f278g497d474o973c832o331m650v289m550h428r701v199x264f234c458q610j913a411a444d251j989y449k403c758x402r811a644i177g791a520u196m546w678n364s508e662z578r921a492w540v764z795e291e584e213j755s565a366p387b879i901k862g867s619w697j984c402h333k510p981t451a963m269z314o848v719q597c895a203f394c264n967f545k894k754g296d164w575x613q633b313f129j365p136c559a788s337q511m331o926d133n630a401w350c302a897j235q787a262r850i669n763s152c766e227c282v989w140v409l872o795y478q582s166x773w167t718o302i477c757t994p310a699k603x822l558l733f436l490b759l471y865y769l661j296u149u153w151h531a685o813m756n671s121v766w193e476w584b447k210a275x581n482t198l215s141o523i635w746r107s187g570x209x111z435n678g558u946o562v991c927x226p438b487o287q483t573f345i334l264j246r554s185b742x401l509s710r157u364j119v923w914i738r944y793g316g288l897w150t565i557m957f966m601a275w575x822l509n304d164l897l558g296c458a401l700f234x613u149k210x402q610x581a492m133l725q633f129q483a444v835r604f394y865d251b313y769z234b559f993q119j989w150n923y449l661c302c264l471x930k754f436v295j913s710z969k894l490b759a411f545o725y601w914n967l733m601b487o287p310r107s565w350s187j984w697j755g570u364x209c402b742n630e213a366w746s141i901t198b879k603o523v923f966p387g867i635s619a699q132k862h333x401o926c927c559r701j119p438m269a963p136t724z314a818v764j365i557x264v991a788x111k510z435e584m331n678p981g558e291q511u946z795t451s337o562w540w140l215y793n763a203w584y163o973o795w422e662g791c580e476k483y478i669d474w193q582s166o331k382k804r850g316v766g497a520x773m550t994n364l264e227c326r157c766v719w678c139m546v339s152m650c282v989r944r554c895v409u196c832v289a897f345s508a577z578i177a644c757s185i738m756o813j246a685r811h246q597a251q787c758u327t565c819o302h531i477w151y260r921a457j296w167a262b128n671g288v603i334k403p189o809g107t718i386s121z581f278j235m957e291u946g558i386e584o523i635b879u327n678r554z435s565g497s187o809z581r107k804a366j755g570d474c832x209u364k483o973y163e213a251p387v923x111a577g288v603c139j296o331b759l490z969n671g107v835s710s121f436r604l733l509y260v766n304a457l558o725c819i738n923p189q119l661f993m957h246w151y769y865b559s141l471x930a685w193x822k382v289b487c895t724j119p438r701m650i557v719c927x264v991l264o562f345a818o287m550l700g316u149e476k210l725a275z795c580m133w422x581v295a203y793q483w584f966q132m269w167m601a492x773a401i177s166q582w540j365v764g791y478l897o795p136t451c282s337m546v989a788a644v409r944a963t994c559a520a444u196e227f129t718n967d251b313f545a411w914c302j989k894y449w150j913a897k754k403c757g296r921q633t565r811f234x613x402o302w575c458i477c758d164q610s185q511c264k862n763h333c402q787b128g867b742e662i669a699q597j984a262w350n364z578w697p310k603k510r850j235i901s152o926m331x401l215j246s508p981i334r157t198c766q597l661y769s185d251y865a685a262b128r811z581c819r850b559c264o302g288h246a251c757j989i334f993y449i738i386u327q787n923c758c302q132n671a897k403b313p189m957w151v603j296o331a644o809n763w422o795a203a520y163r944m550f345s152t994c895y793u196c832s508m546a577m650v719c766c282j235e227c139n364v289r921y478c580t565g497t718g107s121z578j246k804w167d474y260v766w584x773i669i177r157k382k483q582e476g791g316r554o973e662w193s141n967p438b487g570j119j755v764a818o287w540w350q483j365j984v295s187a401x581a492e291a275f129s565x402l725b742t724k210p136m269q511u946x401g558t451z795o562s337v991a788a963x264e584c927p981o926i557c559u364r701l897k510e213h333c402m601m133m331w914s710a699g296a411b879p387r604l509f436u149b759g867k603i635k862k754j913v835l490k894l733f966f545o725p310r107q633x930f234l700a444a366x822x613l471z969c458q610w697v923w575t198n304i901u364o926q132a577i386t198g558c766u946p310s152z581f966o809s187b128i669b742k804a699i635r107r554w350k603a251k483a262n763q787y163g570s141q511s121t718k210u149n671o302q633x822a685m957w575l733c757w151g296s185a897c264f545y769l471l661b313a401b759k754c302j296l490l509w914k894e227a275x581o562c139c282v719v991s337c927t994a788c895l897f345c559r944p438o795y865x773w193j365q483j119e476w167q582p136w584b487y478g316a203o287j246j235m550a818i901v923c758v295k862z578o331g791w697c819k403m133j984c832q597t565c402h333k510o973l700p981m650j989b879g107a644y793i557e291c580k382w422s508e584x264e213r921j755e662r701y260i177r811s565m546a366t724p387v289h246l725z969w540i334v835a444t451a492g288x402n923i738n364f234m601f993c458u196a411x930u327q610b559j913v603g497z795a520d474d251s710r604x401m269a963p189n304u149x264l725i177l471r944v295x930c927a520y865b559m546v991l700w167s710n304k210p438x581q582r604j119l490q483a275a818l733o287z969m133t565l509o795t724b759b487u196r701y478x773x822o809i557p136c559m269a788x401a963l897s337b313j989q511t451c302o926p981c264d251w540j365j913a411k894g296w914q610w575h333m601q633f234f545x402a401a492a444c458z795s185a366s187s565r921o302g570j755e213t718u364g558e584e291a644o562r811r107p387i635w350k403j984p310a897a699w697c402b742k862t198c758c757s141i901b879v923y769g791c282s121g107k804w151d474e227m550r554f345c139g288p189g497g316y260c895i334h246k483z578c766a577c819q597c832i669a685a262m650y163n671v289b128m957o973n763o331j296q132w584j246e476c580u327i386s152w422v603a251l661k382i738n923e662n364q787y793z581s508a203m650n763z578c758v923k862g570e584t198s141c139w422b742i557u364g316r921m550e213a203a644j755r107p387c895r811c580f345o302e662b879s565t718g558c757i901i635s187c402a699c264p136g497d251r554j365z581a492a444q787a401f545x402f234a251q633c458v603j246w575w914q610i386m601k894u327g296j913q132i334x401o809c559o331a897a577j984q597k403w350c832s185w584y163h333a262w697p981c302t451k483q511o973l897j989g288s337a963k804d474a788m269o926b128a366r701m957v295b759n364z969w151l700o287t724r944x773l509a275g107s508l471u196p438j296n923a411l733p189n304q582g791n671e227o795r604x581c819l490a818c766l725q483t565j119l661m546x822k210w167h246s710v991b559i738z795c282e476k382y769o562i177y478y865a520x264a685y793y260c927i334l897o809a520p189k483x822a963l725l733m269y478s185s337o973d474h246j989k804e227w151q511l700t451e662z581c264w914l661v603j246q610l509m546w575i386y865n364i738k894c282g296u327j913y769m601a444q633l471a251x401n304o795p136n923r604t565d251j365c559a492g497z969a401f545u196x402r554q787f234j296c302c832g288f345g107b559r701m550x773b128p387t724s565r811i635o287c766o926r944i901g316a818m650b879z578n763y793u364k382t718e584e476v991g558z795i557p438w167i177a203a644c927j755y260w422x264g570e213q483c757a411o562a685a897o331q597j984v295q582k862j119k210y163a275w350s508k403a699s710c402c819x581c139a262m957b742c758v923w697p981t198g791a262a644i334a520k894e662n364k403b128i177c302s508y478t718c282o795m546z578g791c758d251c264u196c757r811c766e227n763f545r944x773s185a897q787a444m601t565q597j989u327b879j755y260g570a275s710s565p387l471i635e476k382j119i901a203b742w422k862t198k210a699f345v923m550l661o287v295e213w151p189t724q483r701p438a818x264c927a411j296a685z795v991h246u364o562m957c819x581e584g558y793i557j984l725g316p136r604a492k483o973j365d474l509z581a401g497x402y865q633z969a251w914w575r554i386j913g296i738c832c559x401n304w350m650c402b559o926c139l897x822l700t451y163q511l733m269y769g288n923a963o331s337n364b128l661y865y769b559t565c282l509j296a411p189m650q597g288o331y163i334c139c832a262z581g497a251q787r554k483w151g316m550e227a685s508h246y793c766z578m957e662e476k382a203w422f345y260m546a818l471s337a963q511k403t451o926l897c302w350c402v923c758j984a699a897t198k862j989b742m269c264g296j913w575k894a444q633w914a401f545x402j365m601d251a492x401p136c559s185i738i901c757t724s710x581a275a520k210j119y478l725x822l700l733n304o795u196r604z969g791i635o287r701r811u327s565g570i557a644j755e213e584t718o562i177v991z795r944u364p438x773b879'] to numeric

## Bar and Pie Charts

In [None]:
# Generate a bar plot showing the total number of rows (Mouse ID/Timepoints) for each drug regimen using Pandas.


In [None]:
# Generate a bar plot showing the total number of rows (Mouse ID/Timepoints) for each drug regimen using pyplot.


In [None]:
# Generate a pie plot showing the distribution of female versus male mice using Pandas


In [None]:
# Generate a pie plot showing the distribution of female versus male mice using pyplot


## Quartiles, Outliers and Boxplots

In [None]:
# Calculate the final tumor volume of each mouse across four of the treatment regimens:  
# Capomulin, Ramicane, Infubinol, and Ceftamin

# Start by getting the last (greatest) timepoint for each mouse


# Merge this group df with the original DataFrame to get the tumor volume at the last timepoint


In [None]:
# Put treatments into a list for for loop (and later for plot labels)


# Create empty list to fill with tumor vol data (for plotting)


# Calculate the IQR and quantitatively determine if there are any potential outliers. 

    
    # Locate the rows which contain mice on each drug and get the tumor volumes

    
    # add subset 

    
    # Determine outliers using upper and lower bounds


In [None]:
# Generate a box plot that shows the distrubution of the tumor volume for each treatment group.


## Line and Scatter Plots

In [None]:
# Generate a line plot of tumor volume vs. time point for a single mouse treated with Capomulin


In [None]:
# Generate a scatter plot of mouse weight vs. the average observed tumor volume for the entire Capomulin regimen


## Correlation and Regression

In [None]:
# Calculate the correlation coefficient and a linear regression model 
# for mouse weight and average observed tumor volume for the entire Capomulin regimen
