In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report # Accuracy metrics
import pickle
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:

dataset = pd.read_csv('/content/IRIS (1).csv')
labels = dataset['species']
pd.DataFrame(labels)

Unnamed: 0,species
0,Iris-setosa
1,Iris-setosa
2,Iris-setosa
3,Iris-setosa
4,Iris-setosa
...,...
145,Iris-virginica
146,Iris-virginica
147,Iris-virginica
148,Iris-virginica


In [3]:
dataset.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:

dataset.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [5]:
features = dataset.drop([ 'species'], axis=1)
features

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [6]:

fig1 = px.bar(dataset, x='species', y='petal_width', labels={'y': 'Petal Width', 'x': 'Species'})

# %%
fig2 = px.bar(dataset, x='species', y='petal_length', labels={'y': 'Petal Length', 'x': 'Species'})

# %%
fig3 = px.bar(dataset, y='species', x='sepal_length', labels={'y': 'Sepal Length', 'x': 'Species'})

# %%
fig4 = px.bar(dataset, y='species', x='sepal_width', labels={'y': 'Sepal Length', 'x': 'Species'})

# %%
fig5 = px.scatter_3d(dataset,x='sepal_length', y='petal_length', z='petal_width', color='species')

In [7]:
subplot_fig = make_subplots(rows=2, cols=2, subplot_titles=('Petal Width by Species', 'Petal Length by Species', 'Sepal Length by Species', 'Sepal Width by Species'))

subplot_fig.add_trace(fig1['data'][0], row=1, col=1)
subplot_fig.add_trace(fig2['data'][0], row=1, col=2)
subplot_fig.add_trace(fig3['data'][0], row=2, col=1)
subplot_fig.add_trace(fig4['data'][0], row=2, col=2)
subplot_fig.update_traces(marker_line_width=0)
subplot_fig.show()

In [8]:
fig5

In [9]:
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)



pipelines = {
    'Logistic_Regression': make_pipeline(StandardScaler(), LogisticRegression()),
    'Ridge_Classifier': make_pipeline(StandardScaler(), RidgeClassifier()),
    'Lin_Support_Vector_Class': make_pipeline(StandardScaler(), LinearSVC()),
    'KNearest_Neighbors': make_pipeline(StandardScaler(), KNeighborsClassifier()),
    'Naive_Bayes': make_pipeline(StandardScaler(), GaussianNB())
    }

In [10]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(x_train, y_train)
    fit_models[algo] = model

    model_path = f'{algo}.pkl'
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)

for algo, model in fit_models.items():
    yhat = model.predict(x_test)
    print(algo,'\n', classification_report(y_test, yhat))

Logistic_Regression 
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

Ridge_Classifier 
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.86      0.67      0.75         9
 Iris-virginica       0.77      0.91      0.83        11

       accuracy                           0.87        30
      macro avg       0.88      0.86      0.86        30
   weighted avg       0.87      0.87      0.86        30

Lin_Support_Vector_Class 
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicol

In [11]:

user_values=np.array([4.9,2.0,1.0,1.3]).reshape(1,-1)
feature_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
user_values_df = pd.DataFrame(user_values, columns=feature_names)

with open('Logistic_Regression.pkl', 'wb') as f:
        pickle.dump(model, f)
predict = model.predict(user_values_df)
print(f'The species is: {predict}')

The species is: ['Iris-versicolor']


In [12]:

dataset.shape

(150, 5)

In [13]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    # Add other parameters to tune
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)

# Get best parameters and best estimator
best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

# Use the best model for prediction
y_pred = best_estimator.predict(x_test)
print(classification_report(y_test, y_pred))


                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

