In [1]:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

import pickle
#conda install requests will be needed
import requests, json



### Building a simple model

In [2]:
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
print(iris.DESCR)

Iris Plants Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :Date: July, 1988

This is a copy of UCI ML iris d

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
rfc = RandomForestClassifier(n_estimators=100, n_jobs=2)

In [6]:
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=2,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [7]:
print("Accuracy = %0.2f" % accuracy_score (y_test, rfc.predict(X_test)))
print(classification_report(y_test, rfc.predict(X_test)))

Accuracy = 0.97
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         9
          1       1.00      0.94      0.97        16
          2       0.93      1.00      0.96        13

avg / total       0.98      0.97      0.97        38



### Model serialization / marshalling

In [8]:
pickle.dump(rfc, open("iris_rfc.pkl", "wb"))

In [9]:
my_random_forest = pickle.load(open("iris_rfc.pkl", "rb"))

In [10]:
my_random_forest

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=2,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [11]:
print(classification_report(y_test, my_random_forest.predict(X_test)))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00         9
          1       1.00      0.94      0.97        16
          2       0.93      1.00      0.96        13

avg / total       0.98      0.97      0.97        38



Next we will start a flask service.   That's in the file 'flask_demo.py'


Once it's started, we can use this bit of code to call it.

In [12]:

url = "http://localhost:9000/api"
data = json.dumps({'sl':5.84,'sw':3.0, 'pl':3.75,'pw':1.1}) 
r = requests.post(url, data)

print(r.json())

{'results': {'y_hat': 1}}
