In [1]:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import pickle
import requests,json



In [2]:
iris = datasets.load_iris()  # Import Dataset
print(iris.DESCR)

Iris Plants Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :Date: July, 1988

This is a copy of UCI ML iris d

In [3]:
X = iris.data
y = iris.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

### Build & Evaluate Model

In [5]:
model = RandomForestClassifier(n_estimators=100, n_jobs=2)

In [6]:
model.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=2, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [7]:
print("Accuarcy = %0.2f" % accuracy_score(y_test, model.predict(X_test)))
print(classification_report(y_test, model.predict(X_test)))

Accuarcy = 0.92
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        11
          1       0.83      1.00      0.91        15
          2       1.00      0.75      0.86        12

avg / total       0.93      0.92      0.92        38



### Serialize Model

In [8]:
pickle.dump(model, open("iris_classifier.pkl","wb"))  # Serialize and load into a pickle file

In [9]:
model_pkl = pickle.load(open("iris_classifier.pkl","rb"))

In [10]:
model_pkl

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=2, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [11]:
print(classification_report(y_test, model_pkl.predict(X_test)))  # Accuracy of the Serialized Model (model_pkl)

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        11
          1       0.83      1.00      0.91        15
          2       1.00      0.75      0.86        12

avg / total       0.93      0.92      0.92        38



### Start Flask Service by running "Flask Web Service.ipynb" file

In [12]:
url = "http://localhost:9000/api"
data = json.dumps({'sl':5.84, 'sw':3.0, 'pl':3.75, 'pw':1.1})
output = requests.post(url, data)
print(output.json())

{'results': 1}
