## Observations and Insights

## Dependencies and starter code

In [1]:
%matplotlib notebook

In [8]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# Study data files
mouse_metadata= "data/Mouse_metadata.csv"
study_results="data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata_df = pd.read_csv(mouse_metadata)
study_results_df = pd.read_csv(study_results)

# Combine the data into a single dataset

combine_MData_df = pd.merge(mouse_metadata_df, study_results_df, how= 'outer', on='Mouse ID')

combine_MData_df.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.0,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1


In [11]:
Drug_MID = combine_MData_df["Mouse ID"].nunique()
Drug_MID

249

## Summary statistics

In [None]:
combine_MData_df.sort_values
combined_MData_df=combine_MData_df.sort_values(["Tumor Volume (mm3)"], ascending=True)

combined_MData_df.head(20)

In [None]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

In [None]:
Tumor_Vol = combined_MData_df['Tumor Volume (mm3)']

combined_MData_df['Tumor Volume (mm3)']
Tumor_Mean = Tumor_Vol.mean()
Tumor_Median= Tumor_Vol.median()
Tumor_Var= Tumor_Vol.var()
Tumor_Std= Tumor_Vol.std()
Tumor_Sem= Tumor_Vol.sem()

Tumor_Summary=  [{"Mean": Tumor_Mean,"Median":Tumor_Median,
                 "Variance":Tumor_Var,"Standard Deviation":Tumor_Std,"SEM":Tumor_Sem}]

Tumor_Summary


## Bar plots

In [None]:
# Generate a bar plot showing number of data points for each treatment regimen using pandas

In [None]:
Drug_R = combine_MData_df["Drug Regimen"].value_counts()

Drug_R.plot(kind= "bar", facecolor= "blue")


In [None]:
# Generate a bar plot showing number of data points for each treatment regimen using pyplot

In [None]:
# DATA SET 1
Drugs = ['Ramicane', 'Capomulin', 'Infubinol', 'Placebo', 'Ceftamin',
       'Stelasyn', 'Zoniferol', 'Ketapril', 'Propriva', 'Naftisol']



In [None]:
Drug_R = combine_MData_df["Drug Regimen"].value_counts()
Drug_R

In [None]:
Drugs = ["Capomulin", "Ramicane", "Ketapril", "Naftisol", "Zoniferol", "Stelasyn", "Placebo", 
         "Ceftamin", "Infubinol", "Propriva"]
Treatments = [230, 228, 188, 186,182, 181, 181, 178, 178, 161]
x_axis = np.arange(len(Treatments))
plt.bar(x_axis, Treatments, color="b", align="center")
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Capomulin", "Ramicane", "Ketapril", "Naftisol", "Zoniferol", "Stelasyn", "Placebo","Ceftamin", "Infubinol", "Propriva"])
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Capomulin", "Ramicane", "Ketapril", "Naftisol", "Zoniferol", "Stelasyn", "Placebo","Ceftamin", "Infubinol", "Propriva"])




## Pie plots

In [None]:
# Generate a pie plot showing the distribution of female versus male mice using pandas

In [None]:
Male_mice= combined_MData_df[combined_MData_df["Sex"]=="Male"].nunique()
Female_mice = combined_MData_df[combined_MData_df["Sex"]=="Female"].nunique()

Male_mice


In [47]:
labels = ["Male", "Female"]

# The values of each section of the pie chart
sizes = [125, 124]

# The colors of each section of the pie chart
colors = ["blue", "pink"]

# Tells matplotlib to seperate the "Humans" section from the others
explode = (0.1, 0, 0, 0)

plt.pie(sizes, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)

([<matplotlib.patches.Wedge at 0x1a1cea8b10>,
  <matplotlib.patches.Wedge at 0x1a1ce9a310>],
 [Text(-0.7017365844029831, -0.8470925369228766, 'Male'),
  Text(0.7017365844029833, 0.8470925369228766, 'Female')],
 [Text(-0.3827654096743544, -0.4620504746852053, '50.2%'),
  Text(0.38276540967435446, 0.4620504746852053, '49.8%')])

In [48]:
# Generate a pie plot showing the distribution of female versus male mice using pyplot

## Quartiles, outliers and boxplots

In [73]:
#Extract the top 4 regimenes from the data frame in order to perform IQR test on each
best_regimes = combine_MData_df[combine_MData_df["Drug Regimen"].isin(["Capomulin", "Ramicane", "Infubinol", "Ceftamin"])]
best_regimes.sort_values (["Timepoint"], ascending=True)
best_regimes
best_regimes_data = best_regimes[["Drug Regimen", "Mouse ID", "Timepoint", "Tumor Volume (mm3)"]]
best_regimes_data

#Group data by Drug Regimen and Mouse ID to capture Last Tumor Measurement
best_regimens_sort = best_regimes_data.groupby(['Drug Regimen', 'Mouse ID']).last()['Tumor Volume (mm3)']
best_regimens_sort.head()


# Turn retrieved data into dataframe to easily manipulate
best_regimen_df = best_regimens_sort.to_frame()
best_regimen_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug Regimen,Mouse ID,Unnamed: 2_level_1
Capomulin,b128,38.982878
Capomulin,b742,38.939633
Capomulin,f966,30.485985
Capomulin,g288,37.074024
Capomulin,g316,40.15922


In [91]:
best_regimen_df





Index(['Tumor Volume (mm3)'], dtype='object')

In [75]:
Cap_Tum_vol_df= best_regimen_df.mean


fig1, ax1 = plt.subplots()
ax1.set_title("Tumor Vol Results")
ax1.set_ylabel("Tumor Vol mm3"
ax1.boxplot(Cap_Tum_vol_df)
plt.show()



SyntaxError: invalid syntax (<ipython-input-75-357c3d28048d>, line 7)

In [67]:
# Calculate the final tumor volume of each mouse across four of the 
most promising treatment regimens. 
Calculate the IQR and quantitatively determine if there are any potential 
outliers. 
Calculate the final tumor volume of each mouse across four of the most 
promising treatment regimens: 
Capomulin, Ramicane, Infubinol, and Ceftamin. 
Calculate the quartiles and IQR and quantitatively determine if 
there are any potential outliers across all four treatment regimens.

* Using Matplotlib, generate a box and whisker plot of the final tumor 
volume for all four treatment regimens and highlight any potential 
outliers in the plot by changing their color and style.


times = [96,98,100,105,85,88,95,100,101,102,97,98,5]
fig1, ax1 = plt.subplots()
ax1.set_title('Reaction Times at Baseball Batting Cage')
ax1.set_ylabel('Reaction Time (ms)')
ax1.boxplot(times)
plt.show()

SyntaxError: invalid syntax (<ipython-input-67-1e00fec76836>, line 2)

In [None]:
# Generate a box plot of the final tumor volume of each mouse across four 
regimens of interest

## Line and scatter plots

In [None]:
# Generate a line plot of time point versus tumor volume for a mouse treated with Capomulin

In [None]:
# Generate a scatter plot of mouse weight versus average tumor volume for the Capomulin regimen

In [None]:
# Calculate the correlation coefficient and linear regression model for mouse weight and average tumor volume for the Capomulin regimen