In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

In [3]:
# Read the Mouse and Drug Data 
mouse_data = pd.read_csv(mouse_drug_data_to_load, delimiter=",")
mouse_data_df = pd.DataFrame(mouse_data)
mouse_data_df.head()

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn


In [4]:
# Read the Clinical Trial Data
clinicaltrial_data = pd.read_csv(clinical_trial_data_to_load,  delimiter=",")
clinicaltrial_data_df = pd.DataFrame(clinicaltrial_data)
clinicaltrial_data_df.head()


Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,a203,0,45.0,0
1,a203,5,48.508468,0
2,a203,10,51.852437,1
3,a203,15,52.77787,1
4,a203,20,55.173336,1


In [5]:
mouse_data_df.columns
mouse_data_df.dtypes

Mouse ID    object
Drug        object
dtype: object

In [6]:
clinicaltrial_data_df.columns
clinicaltrial_data_df.dtypes

Mouse ID               object
Timepoint               int64
Tumor Volume (mm3)    float64
Metastatic Sites        int64
dtype: object

In [7]:
# Combine the data into a single dataset; display table to preview
mouse_trialmerge = pd.merge(mouse_data_df, clinicaltrial_data_df, on="Mouse ID")
mouse_trialmerge_df = pd.DataFrame(mouse_trialmerge)
mouse_trialmerge_df.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


In [8]:
mouse_trialmerge_df.to_csv("../Pymaceuticals/mouse_trialmerge.csv")

## Tumor Response to Treatment

In [9]:
#list of drugs
drugstried = mouse_trialmerge_df.groupby(["Drug"])
druglist = drugstried["Mouse ID"].first()
print(druglist)

Drug
Capomulin    b128
Ceftamin     h531
Infubinol    q132
Ketapril     q119
Naftisol     j755
Placebo      a897
Propriva     g989
Ramicane     i334
Stelasyn     f234
Zoniferol    d133
Name: Mouse ID, dtype: object


In [10]:
#list of mice
mice= mouse_trialmerge_df.groupby(["Mouse ID"])
mouseID = mice["Tumor Volume (mm3)"].unique()
#print(mouseID)

In [11]:
#avg tumor size by timepoint
by_drug = mouse_trialmerge_df.groupby(["Drug"])
drugtype = by_drug["Tumor Volume (mm3)"].mean()

#avgtumor_by timepoint = mouse_trialmerge_df["Tumor Volume (mm3)"].mean()
print(drugtype)

Drug
Capomulin    40.675741
Ceftamin     52.591172
Infubinol    52.957935
Ketapril     55.235638
Naftisol     54.331565
Placebo      54.033581
Propriva     52.322552
Ramicane     40.216745
Stelasyn     54.106357
Zoniferol    53.236507
Name: Tumor Volume (mm3), dtype: float64


In [12]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
TVgroupbyDT = mouse_trialmerge_df.groupby(["Drug", "Timepoint"])["Tumor Volume (mm3)"]
avgTVgroupbyDT = TVgroupbyDT.mean()

#reset index to convert to df
avgTVgroupbyDT = avgTVgroupbyDT.reset_index()
avgTVgroupbyDT_df = pd.DataFrame(avgTVgroupbyDT)
avgTVgroupbyDT_df.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint

# Convert to DataFrame

# Preview DataFrame


In [13]:
# Minor Data Munging to Re-Format the Data Frames
# Preview that Reformatting worked


In [14]:
pd.get_option('display.max_rows')

60

In [15]:
pd.get_option('display.max_columns', None)

20

In [16]:
# Minor Data Munging to Re-Format the Data Frames

sum(avgTVgroupbyDT_df["Drug"].isnull())
sum(avgTVgroupbyDT_df["Timepoint"].isnull())
sum(avgTVgroupbyDT_df["Tumor Volume (mm3)"].isnull())

0

In [17]:
# Minor Data Munging to Re-Format the Data Frames (use pivot function); preview df
pivot_avgTVgroupbyDT_df = avgTVgroupbyDT_df.pivot(index ='Timepoint',columns='Drug',values='Tumor Volume (mm3)')
pivot_avgTVgroupbyDT_df.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [18]:
pivot_avgTVgroupbyDT_df.columns

Index(['Capomulin', 'Ceftamin', 'Infubinol', 'Ketapril', 'Naftisol', 'Placebo',
       'Propriva', 'Ramicane', 'Stelasyn', 'Zoniferol'],
      dtype='object', name='Drug')

In [19]:
# Generate the Plot (with Error Bars)

In [20]:
#additional dependency needed for stats functions
from scipy.stats import sem


# Set up variables for the plot
samples = 

# Calculate means
means = [s.mean() for s in samples]
# Calculate standard error on means
sem = [s.sem() for s in samples]

# Generate the Plot (with Error Bars)
fig, ax = plt.subplots()

ax.errorbar(np.arange(0, len(means)), means, yerr=sem, fmt="o", color="b",
            alpha=0.5, label="Average Tumor Size")

ax.set_xlim(-1, len(samples) + 1)

ax.set_xlabel("Tumor Volume (mm3)")
ax.set_ylabel("Time(days)")

# Give our chart some labels and a tile
plt.title("Tumor Response to Treatment")
plt.legend(loc="best", fontsize="small", fancybox=True)


# Print our chart to the screen
plt.show()

SyntaxError: invalid syntax (<ipython-input-20-9713a6e1ec8a>, line 6)

In [None]:
# Draw a horizontal line with 0.25 transparency
plt.hlines(0, 0, 10, alpha=0.25)

# Create a list of the years that we will use as our x axis
years = [2013,2014,2015,2016]

# Plot our line that will be used to track a wrestler's wins over the years
plt.plot(years, wins_over_time, color="green", label="Wins")

# Plot our line that will be used to track a wrestler's losses over the years
plt.plot(years, losses_over_time, color="blue", label="Losses")

# Place a legend on the chart in what matplotlib believes to be the "best" location
plt.legend(loc="best")

# Give our chart some labels and a tile
plt.title("Popularity of Programming Languages")
plt.xlabel("Programming Language")
plt.ylabel("Number of People Using Programming Languages")

In [None]:
# Save the Figure
plt.savefig("../Pymaceuticals/TumorResponse.png")
plt.show()

In [None]:
# Show the Figure
plt.show()

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview that Reformatting worked


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Convert to DataFrame

# Preview DataFrame


In [None]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()