## Streamlit testing

In [20]:
# Pandas
import pandas as pd
# Numpy
import numpy as np
# MatplotLib
import matplotlib.pyplot as plt

# Preprocessing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import make_column_selector
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer

# Models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# Classification Metrics
from sklearn.metrics import accuracy_score, recall_score, precision_score, \
f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay


# Set global scikit-learn configuration
from sklearn import set_config

# Display estimators as a diagram
set_config(display='diagram') # 'text' or 'diagram'}
seed = 42

In [9]:
# Read original dataset
iris_df = pd.read_csv('Data/iris.csv')
iris_df.sample(frac=1, random_state=seed)


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
73,6.1,2.8,4.7,1.2,Versicolor
18,5.7,3.8,1.7,0.3,Setosa
118,7.7,2.6,6.9,2.3,Virginica
78,6.0,2.9,4.5,1.5,Versicolor
76,6.8,2.8,4.8,1.4,Versicolor
...,...,...,...,...,...
71,6.1,2.8,4.0,1.3,Versicolor
106,4.9,2.5,4.5,1.7,Virginica
14,5.8,4.0,1.2,0.2,Setosa
92,5.8,2.6,4.0,1.2,Versicolor


In [17]:
# selecting features and target data
target = 'variety'
X = iris_df.drop(columns = [target]).copy()
y = iris_df[target].copy()

In [21]:
# split data into train and test sets
# 70% training and 30% test
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=seed, stratify=y)

# create an instance of the random forest classifier
clf = RandomForestClassifier(n_estimators=100)

# train the classifier on the training data
clf.fit(X_train, y_train)



In [23]:
# predict on the test set
y_pred = clf.predict(X_test)

# calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy : {accuracy}')



Accuracy : 0.9111111111111111


In [26]:
# save the model to disk
import joblib
joblib.dump(clf,'rf_model.sav')


['rf_model.sav']