In [1]:
!pip install sklearn2pmml



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os

import numpy as np
import pandas as pd


import matplotlib.pyplot as plt
%matplotlib inline


from sklearn_pandas import DataFrameMapper


from sklearn.datasets import load_iris

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier

from sklearn.pipeline import Pipeline


from sklearn2pmml.decoration import ContinuousDomain
from sklearn2pmml.pipeline import PMMLPipeline

from sklearn2pmml import sklearn2pmml

In [4]:
iris_data = load_iris()

X = pd.DataFrame(data=iris_data.data,
                 columns=['_'.join(feature_name.split()[:2]) for feature_name in iris_data.feature_names])
y = pd.DataFrame(data=iris_data.target,
                 columns=['species'])

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.1,
                                                    stratify=y,
                                                    random_state=42)

In [5]:
iris_train = pd.concat([X_train,
                        y_train],
                       axis=1)
iris_train.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
29,4.7,3.2,1.6,0.2,0
145,6.7,3.0,5.2,2.3,2
8,4.4,2.9,1.4,0.2,0
148,6.2,3.4,5.4,2.3,2
25,5.0,3.0,1.6,0.2,0


In [6]:
iris_test = pd.concat([X_test,
                       y_test],
                      axis=1)
iris_test.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
58,6.6,2.9,4.6,1.3,1
134,6.1,2.6,5.6,1.4,2
147,6.5,3.0,5.2,2.0,2
69,5.6,2.5,3.9,1.1,1
107,7.3,2.9,6.3,1.8,2


In [7]:
pipeline = PMMLPipeline([
    ('mapper',
     DataFrameMapper([
         (X_train.columns.values,
          [ContinuousDomain(),
           SimpleImputer(),
           StandardScaler()])])),
    ('pca',
     PCA(n_components=4)),
    ('selector',
     SelectKBest(k=2)),
    ('classifier',
     DecisionTreeClassifier())
])

pipeline.fit(X_train,
             y_train.values.ravel());

In [8]:
print(classification_report(pipeline.predict(X_train),
                            y_train))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        45
           1       1.00      1.00      1.00        45
           2       1.00      1.00      1.00        45

    accuracy                           1.00       135
   macro avg       1.00      1.00      1.00       135
weighted avg       1.00      1.00      1.00       135



In [9]:
pipeline.predict(X_test)

array([2, 1, 2, 1, 2, 0, 0, 0, 2, 1, 0, 2, 1, 2, 0])

In [10]:
y_test.values.ravel()

array([1, 2, 2, 1, 2, 0, 0, 0, 2, 1, 0, 2, 1, 1, 0])

In [11]:
print(classification_report(pipeline.predict(X_test),
                            y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       0.60      0.75      0.67         4
           2       0.80      0.67      0.73         6

    accuracy                           0.80        15
   macro avg       0.80      0.81      0.80        15
weighted avg       0.81      0.80      0.80        15



In [12]:
sklearn2pmml(pipeline,
             '/content/drive/MyDrive/Colab Notebooks/CS 422/Project/iris_pipeline.pmml',
             with_repr = True)