In [5]:
# reserve the first cell, to import all dependencies (python classes) that you will need

import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
# keep all functions in one cell, call them when you need them. 
def read_file(path): # path = dynamic input parameter
    return pd.read_csv(path, header = None)

In [6]:
# use function to read in cancer Dataset. 
path = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
df = read_file(path)

In [17]:
df.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758


In [12]:
# find data dictionary of df
df.dtypes

0       int64
1      object
2     float64
3     float64
4     float64
5     float64
6     float64
7     float64
8     float64
9     float64
10    float64
11    float64
12    float64
13    float64
14    float64
15    float64
16    float64
17    float64
18    float64
19    float64
20    float64
21    float64
22    float64
23    float64
24    float64
25    float64
26    float64
27    float64
28    float64
29    float64
30    float64
31    float64
dtype: object

In [22]:
# Obtain the first 10 features, which are the mean figures in the dataset, 
# call your dataFrame with only the features, data_df, and  the dataframe with only 
# the outcome ( or response/label) target_df
data_df = df.ix[:, 2:11]
target_df = df.ix[:, 1]

In [26]:
data_df.tail(3)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884


In [28]:
target_df.tail(3)

566    M
567    M
568    B
Name: 1, dtype: object

In [44]:
# Convert DataFrame to Numpy Array
x = data_df.as_matrix(columns=None)
y = target_df.as_matrix(columns=None)

In [45]:
### specify the model, call the final fit, md1_fit, hint use MNLogit from class SM, 
# and call the .fit() method on the object

mdl = sm.MNLogit(y, x)
mdl_fit = mdl.fit()

Optimization terminated successfully.
         Current function value: 0.128707
         Iterations 11


In [49]:
### print model summary ###
mdl_fit.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,569.0
Model:,MNLogit,Df Residuals:,559.0
Method:,MLE,Df Model:,9.0
Date:,"Sun, 16 Oct 2016",Pseudo R-squ.:,0.8051
Time:,14:16:03,Log-Likelihood:,-73.234
converged:,True,LL-Null:,-375.72
,,LLR p-value:,1.794e-124

y=M,coef,std err,z,P>|z|,[95.0% Conf. Int.]
x1,-2.9479,3.379,-0.873,0.383,-9.570 3.674
x2,0.3777,0.063,5.977,0.0,0.254 0.502
x3,-0.0457,0.501,-0.091,0.927,-1.028 0.936
x4,0.0475,0.01,4.685,0.0,0.028 0.067
x5,74.4357,30.948,2.405,0.016,13.779 135.092
x6,2.4326,19.004,0.128,0.898,-34.815 39.681
x7,7.4069,7.713,0.96,0.337,-7.711 22.524
x8,70.1621,27.678,2.535,0.011,15.913 124.411
x9,15.1245,10.345,1.462,0.144,-5.150 35.399
x10,-96.4246,70.036,-1.377,0.169,-233.693 40.843


In [50]:
### print the aic and bic metrics ###
print(mdl_fit.aic)
print(mdl_fit.bic)

166.46818873
209.906993071


In [55]:
### get the marginal effects of your fitted model md1_fit ###
mdl_margeff = mdl_fit.get_margeff()

In [56]:
mdl_margeff.summary()

0,1
Dep. Variable:,y
Method:,dydx
At:,overall

y=B,dy/dx,std err,z,P>|z|,[95.0% Conf. Int.]
x1,0.1129,0.129,0.878,0.380,-0.139 0.365
x2,-0.0145,0.002,-7.763,0.000,-0.018 -0.011
x3,0.0017,0.019,0.091,0.927,-0.036 0.039
x4,-0.0018,0.000,-5.427,0.000,-0.002 -0.001
x5,-2.8506,1.141,-2.499,0.012,-5.086 -0.615
x6,-0.0932,0.728,-0.128,0.898,-1.520 1.334
x7,-0.2837,0.294,-0.966,0.334,-0.859 0.292
x8,-2.6869,1.016,-2.644,0.008,-4.679 -0.695
x9,-0.5792,0.392,-1.476,0.140,-1.348 0.190
x10,3.6927,2.658,1.389,0.165,-1.517 8.903
