# ISP2025 - Winter - API Development with PYTHON using FLASK

In [2]:
import pandas as pd 
import numpy as np 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

## Load data 

In [9]:
df = pd.read_csv('iris.csv')

In [10]:
df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [11]:
df.isna().sum()

sepal-length    0
sepal-width     0
petal-length    0
petal-width     0
species         0
dtype: int64

In [12]:
df.groupby('species').count()

Unnamed: 0_level_0,sepal-length,sepal-width,petal-length,petal-width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iris-setosa,50,50,50,50
Iris-versicolor,50,50,50,50
Iris-virginica,50,50,50,50


## Split data for training and testing 

In [14]:
X = df[['sepal-length',	'sepal-width',	'petal-length']].values

In [18]:
def species2label(name:str):
    species = ['Iris-setosa',	'Iris-versicolor',	'Iris-virginica']
    return species.index(name)

In [22]:
y = df.species.apply(species2label).values

In [24]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y, shuffle=True)

## Train the random forest model 

In [25]:
cls = RandomForestClassifier()

In [26]:
cls.fit(X_train, y_train)

In [27]:
predictions = cls.predict(X_test)

## Evaluate the model

In [32]:
from sklearn.metrics import accuracy_score

In [34]:
print(f'Acc: {accuracy_score(y_test, predictions)}')

Acc: 0.9


## Save the model 

In [38]:
from pickle import dump
from pickle import load

In [37]:
with open("isp2025_irisCls.pkl", "wb") as f:
    dump(cls, f, protocol=5)

## Load the model 

In [39]:
with open("isp2025_irisCls.pkl", "rb") as f:
    model = load(f)

In [40]:
print(f'Acc: {accuracy_score(y_test, model.predict(X_test))}')

Acc: 0.9
