In [1]:
%pip install -r ~/app/requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn import metrics
import joblib

In [3]:
data = pd.read_csv('iris.csv')

In [4]:
y = data['species']
X = data.drop(columns=['species'])

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23)

In [6]:
column_names = list(X_train.columns)

In [7]:
imp = SimpleImputer(strategy='median')

In [8]:
ct = ColumnTransformer([('imputer', imp, column_names)])

In [9]:
clf = RandomForestClassifier(random_state=23)

In [10]:
pipe = Pipeline([
    ('preprocessor', ct),
    ('classifier', clf)]
)

In [11]:
pipe.fit(X_train, y_train)

In [12]:
y_pred = pipe.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.93      1.00      0.97        14
 Iris-virginica       1.00      0.92      0.96        13

       accuracy                           0.98        45
      macro avg       0.98      0.97      0.98        45
   weighted avg       0.98      0.98      0.98        45



In [13]:
joblib.dump(pipe, '../app/iris.mdl')

['../app/iris.mdl']