In [1]:
import numpy as np
from sklearn import metrics
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from ggplot import *
import pandas as pd
import inspect

#Load the iris dataset into a pandas dataframe, 
#Note:  I am only using this dataset to illustrate what the fit-predict process looks like

iris = datasets.load_iris()

#inspect iris
inspect.getmembers(iris)

#put features data into a dataframe
df = pd.DataFrame(iris.data)

#grab the labels and apply to the columns
df.columns = iris.feature_names

#include the target data
df['target'] = iris.target

#subset the data so that there are only two target values (binary options of 0 and 1 only).
df = df[df.target != 2]


#split data into train and test
y = df['target']
X = df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)



#fit a logistic regression model

model = LogisticRegression()

model = model.fit(X_train.as_matrix(), y_train.as_matrix())


# make predictions

predicted = model.predict(X_test.as_matrix())

#Plot the ROC curve and compute the AUC, see http://blog.yhat.com/posts/roc-curves.html for reference

#TPR=TruePositives/(TruePositives+FalseNegatives)

#FPR=FalsePositives/(FalsePositives+TrueNegatives)

preds = model.predict_proba(X_test.as_matrix())[:,1]
fpr, tpr, _ = metrics.roc_curve(y_test.as_matrix(), preds)


df = pd.DataFrame(dict(fpr=fpr, tpr=tpr))
ggplot(df, aes(x='fpr', y='tpr')) +\
    geom_line() +\
    geom_abline(linetype='dashed')
    
auc = metrics.auc(fpr,tpr)
ggplot(df, aes(x='fpr', ymin=0, ymax='tpr')) +\
    geom_area(alpha=0.2) +\
    geom_line(aes(y='tpr')) +\
    ggtitle("ROC Curve w/ AUC=%s" % str(auc))
    
print auc

1.0
