In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [2]:

from sklearn.datasets import load_iris

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)


df['target'] = iris.target

df.sample(10)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
64,5.6,2.9,3.6,1.3,1
81,5.5,2.4,3.7,1.0,1
3,4.6,3.1,1.5,0.2,0
105,7.6,3.0,6.6,2.1,2
72,6.3,2.5,4.9,1.5,1
29,4.7,3.2,1.6,0.2,0
82,5.8,2.7,3.9,1.2,1
143,6.8,3.2,5.9,2.3,2
88,5.6,3.0,4.1,1.3,1
74,6.4,2.9,4.3,1.3,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB


In [4]:
df.isnull().sum()

Unnamed: 0,0
sepal length (cm),0
sepal width (cm),0
petal length (cm),0
petal width (cm),0
target,0


In [5]:
# To split the data

X = df.iloc[: , :-1] # independent
y = df.iloc[: , -1] # dependent
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [6]:
X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


**Scale down the data**

In [7]:
# standard_scaler

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
res = sc.transform(X_train)

#res -> to give scaled down outputs

scaled_X_train = pd.DataFrame(data=res)
scaled_X_train.columns = X_train.columns

scaled_X_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-1.473937,1.203658,-1.562535,-1.312603
1,-0.133071,2.992376,-1.276006,-1.045633
2,1.085898,0.085709,0.385858,0.289218
3,-1.230143,0.756479,-1.218701,-1.312603
4,-1.717731,0.309299,-1.390618,-1.312603


In [8]:
# to apply same thing on test data

res = sc.transform(X_test)

scaled_X_test = pd.DataFrame(data=res)
scaled_X_test.columns = X_test.columns

scaled_X_test.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.354517,-0.58506,0.557775,0.022248
1,-0.133071,1.650837,-1.161395,-1.179118
2,2.304867,-1.032239,1.8185,1.490583
3,0.23262,-0.36147,0.443164,0.422703
4,1.207795,-0.58506,0.615081,0.289218







**Since the data is ready we need to train the Algorithms**

In [9]:
def knn_algo(x_train,y_train,x_test,y_test):
  knn_res = KNeighborsClassifier(n_neighbors=5) # default k_value is 5
  knn_res.fit(x_train,y_train)
  y_test_pred = knn_res.predict(x_test)
  print(f'Training Accuracy : {knn_res.score(x_train,y_train)}')
  print(f'Test Accuracy : {knn_res.score(x_test,y_test)}')

  print(f'Confusion Matrix Test data : {confusion_matrix(y_test,y_test_pred)}')
  print(f'classification Report Test data : {classification_report(y_test,y_test_pred)}')

In [10]:
def naive_bayes_algo(x_train,y_train,x_test,y_test):
  nav_res = GaussianNB()
  nav_res.fit(x_train,y_train)
  y_test_pred = nav_res.predict(x_test)
  print(f'Training Accuracy : {nav_res.score(x_train,y_train)}')
  print(f'Test Accuracy : {nav_res.score(x_test,y_test)}')

  print(f'Confusion Matrix Test data : {confusion_matrix(y_test,y_test_pred)}')
  print(f'classification Report Test data : {classification_report(y_test,y_test_pred)}')

In [11]:
def log_reg_algo(x_train,y_train,x_test,y_test):
  lg_res = LogisticRegression()
  lg_res.fit(x_train,y_train)
  y_test_pred = lg_res.predict(x_test)
  print(f'Training Accuracy : {lg_res.score(x_train,y_train)}')
  print(f'Test Accuracy : {lg_res.score(x_test,y_test)}')

  print(f'Confusion Matrix Test data : {confusion_matrix(y_test,y_test_pred)}')
  print(f'classification Report Test data : {classification_report(y_test,y_test_pred)}')

In [12]:
def dt_algo(x_train,y_train,x_test,y_test):
  dt_res = DecisionTreeClassifier(criterion='entropy')
  dt_res.fit(x_train,y_train)
  y_test_pred = dt_res.predict(x_test)
  print(f'Training Accuracy : {dt_res.score(x_train,y_train)}')
  print(f'Test Accuracy : {dt_res.score(x_test,y_test)}')

  print(f'Confusion Matrix Test data : {confusion_matrix(y_test,y_test_pred)}')
  print(f'classification Report Test data : {classification_report(y_test,y_test_pred)}')

In [13]:
def rf_algo(x_train,y_train,x_test,y_test):
  rf_res = RandomForestClassifier(criterion='entropy',n_estimators=11)
  rf_res.fit(x_train,y_train)
  y_test_pred = rf_res.predict(x_test)
  print(f'Training Accuracy : {rf_res.score(x_train,y_train)}')
  print(f'Test Accuracy : {rf_res.score(x_test,y_test)}')

  print(f'Confusion Matrix Test data : {confusion_matrix(y_test,y_test_pred)}')
  print(f'classification Report Test data : {classification_report(y_test,y_test_pred)}')

In [14]:
def common(x_train,y_train,x_test,y_test):
  print('-----knn-------')
  knn_algo(x_train,y_train,x_test,y_test)
  print('-----Naive Bayes------')
  naive_bayes_algo(x_train,y_train,x_test,y_test)
  print('-----Logistic Regression-------')
  log_reg_algo(x_train,y_train,x_test,y_test)
  print('-----Decision Tree-------')
  dt_algo(x_train,y_train,x_test,y_test)
  print('-----Random Forest-------')
  rf_algo(x_train,y_train,x_test,y_test)

In [15]:
final_X_train = scaled_X_train.copy()
final_y_train = y_train.copy()
final_X_test = scaled_X_test.copy()
final_y_test = y_test.copy()

In [16]:
common(final_X_train,final_y_train,final_X_test,final_y_test)

-----knn-------
Training Accuracy : 0.9583333333333334
Test Accuracy : 1.0
Confusion Matrix Test data : [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
classification Report Test data :               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

-----Naive Bayes------
Training Accuracy : 0.95
Test Accuracy : 1.0
Confusion Matrix Test data : [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
classification Report Test data :               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   m

**To select best model**

In [17]:
from sklearn.metrics import precision_score

# Define KNN algorithm
def knn_algo(x_train, y_train, x_test, y_test):
    knn_res = KNeighborsClassifier(n_neighbors=5)
    knn_res.fit(x_train, y_train)
    y_test_pred = knn_res.predict(x_test)
    precision = precision_score(y_test, y_test_pred, average='macro')

    return precision

# Define other algorithms similarly
def naive_bayes_algo(x_train, y_train, x_test, y_test):
    nb = GaussianNB()
    nb.fit(x_train, y_train)
    y_test_pred = nb.predict(x_test)
    precision = precision_score(y_test, y_test_pred, average='macro')
    return precision

def log_reg_algo(x_train, y_train, x_test, y_test):
    lr = LogisticRegression()
    lr.fit(x_train, y_train)
    y_test_pred = lr.predict(x_test)
    precision = precision_score(y_test, y_test_pred, average='macro')
    return precision

def dt_algo(x_train, y_train, x_test, y_test):
    dt = DecisionTreeClassifier()
    dt.fit(x_train, y_train)
    y_test_pred = dt.predict(x_test)
    precision = precision_score(y_test, y_test_pred, average='macro')
    return precision

def rf_algo(x_train, y_train, x_test, y_test):
    rf = RandomForestClassifier()
    rf.fit(x_train, y_train)
    y_test_pred = rf.predict(x_test)
    precision = precision_score(y_test, y_test_pred, average='macro')
    return precision

# Common function to find the best precision
def common(x_train, y_train, x_test, y_test):
    precisions = {}

    print('-----KNN-------')
    precisions['KNN'] = knn_algo(x_train, y_train, x_test, y_test)

    print('-----Naive Bayes------')
    precisions['Naive Bayes'] = naive_bayes_algo(x_train, y_train, x_test, y_test)

    print('-----Logistic Regression-------')
    precisions['Logistic Regression'] = log_reg_algo(x_train, y_train, x_test, y_test)

    print('-----Decision Tree-------')
    precisions['Decision Tree'] = dt_algo(x_train, y_train, x_test, y_test)

    print('-----Random Forest-------')
    precisions['Random Forest'] = rf_algo(x_train, y_train, x_test, y_test)

    # Find the model with the best precision
    best_model = max(precisions, key=precisions.get)
    print(f"\nBest Model: {best_model} with Precision: {precisions[best_model]:.4f}")




In [18]:
final_X_train = scaled_X_train.copy()
final_y_train = y_train.copy()
final_X_test = scaled_X_test.copy()
final_y_test = y_test.copy()

In [19]:
common(final_X_train,final_y_train,final_X_test,final_y_test)

-----KNN-------
-----Naive Bayes------
-----Logistic Regression-------
-----Decision Tree-------
-----Random Forest-------

Best Model: KNN with Precision: 1.0000


In [20]:
from sklearn.metrics import precision_score, recall_score

# Update functions to calculate either precision or recall
def knn_algo(x_train, y_train, x_test, y_test, metric='precision'):
    knn_res = KNeighborsClassifier(n_neighbors=5)
    knn_res.fit(x_train, y_train)
    y_test_pred = knn_res.predict(x_test)
    if metric == 'precision':
        return precision_score(y_test, y_test_pred, average='macro')
    elif metric == 'recall':
        return recall_score(y_test, y_test_pred, average='macro')

def naive_bayes_algo(x_train, y_train, x_test, y_test, metric='precision'):
    nb = GaussianNB()
    nb.fit(x_train, y_train)
    y_test_pred = nb.predict(x_test)
    if metric == 'precision':
        return precision_score(y_test, y_test_pred, average='macro')
    elif metric == 'recall':
        return recall_score(y_test, y_test_pred, average='macro')

def log_reg_algo(x_train, y_train, x_test, y_test, metric='precision'):
    lr = LogisticRegression()
    lr.fit(x_train, y_train)
    y_test_pred = lr.predict(x_test)
    if metric == 'precision':
        return precision_score(y_test, y_test_pred, average='macro')
    elif metric == 'recall':
        return recall_score(y_test, y_test_pred, average='macro')

def dt_algo(x_train, y_train, x_test, y_test, metric='precision'):
    dt = DecisionTreeClassifier()
    dt.fit(x_train, y_train)
    y_test_pred = dt.predict(x_test)
    if metric == 'precision':
        return precision_score(y_test, y_test_pred, average='macro')
    elif metric == 'recall':
        return recall_score(y_test, y_test_pred, average='macro')

def rf_algo(x_train, y_train, x_test, y_test, metric='precision'):
    rf = RandomForestClassifier()
    rf.fit(x_train, y_train)
    y_test_pred = rf.predict(x_test)
    if metric == 'precision':
        return precision_score(y_test, y_test_pred, average='macro')
    elif metric == 'recall':
        return recall_score(y_test, y_test_pred, average='macro')

# Updated common function to handle both precision and recall
def common(x_train, y_train, x_test, y_test, metric='precision'):
    metrics = {}

    print('-----KNN-------')
    metrics['KNN'] = knn_algo(x_train, y_train, x_test, y_test, metric=metric)

    print('-----Naive Bayes------')
    metrics['Naive Bayes'] = naive_bayes_algo(x_train, y_train, x_test, y_test, metric=metric)

    print('-----Logistic Regression-------')
    metrics['Logistic Regression'] = log_reg_algo(x_train, y_train, x_test, y_test, metric=metric)

    print('-----Decision Tree-------')
    metrics['Decision Tree'] = dt_algo(x_train, y_train, x_test, y_test, metric=metric)

    print('-----Random Forest-------')
    metrics['Random Forest'] = rf_algo(x_train, y_train, x_test, y_test, metric=metric)

    # Find the model with the best score
    best_model = max(metrics, key=metrics.get)
    print(f"\nBest Model: {best_model} with {metric.capitalize()}: {metrics[best_model]:.4f}")

common(final_X_train,final_y_train,final_X_test,final_y_test,metric='precision')


common(final_X_train,final_y_train,final_X_test,final_y_test,metric='recall')


-----KNN-------
-----Naive Bayes------
-----Logistic Regression-------
-----Decision Tree-------
-----Random Forest-------

Best Model: KNN with Precision: 1.0000
-----KNN-------
-----Naive Bayes------
-----Logistic Regression-------
-----Decision Tree-------
-----Random Forest-------

Best Model: KNN with Recall: 1.0000


In [21]:
#In the above KNN is the best model because this algorithm has  high  precision and recall value
#To select best parameters

from sklearn.model_selection import GridSearchCV
parameters = { 'n_neighbors' : [5,7,9,11,13,15],
               'weights' : ['uniform','distance'],
               'metric' : ['minkowski','euclidean','manhattan']}

grid_model = GridSearchCV(KNeighborsClassifier(), parameters,  cv=10,)



grid = grid_model.fit(final_X_train,final_y_train)

grid.best_params_


{'metric': 'minkowski', 'n_neighbors': 7, 'weights': 'distance'}

In [22]:
#  Apply Best parameters

final_model = KNeighborsClassifier(n_neighbors = 7, weights = 'distance',algorithm = 'brute',metric = 'minkowski')
final_model.fit(X_train, y_train)



print(f'Final Test Accuracy : {final_model.score(final_X_test,final_y_test)}')

Final Test Accuracy : 0.3333333333333333


In [23]:
values = [25,14,15,8]

# sc.transform([values]) to get a scaling values of above

if final_model.predict(sc.transform([values]))[0] == 0:
  print(f'setosa')
elif final_model.predict(sc.transform([values]))[0] == 1:
  print(f'versicolor')
else:
  print(f'virginica')

virginica


In [24]:
# Save the model

import pickle
with open ('iris.pkl','wb') as f:
  pickle.dump(final_model,f)

In [25]:
#Save the scaling

import pickle
with open('scaling_data.pkl','wb') as f:
   pickle.dump(sc,f)

**ANN architecture**

In [26]:
!nvidia-smi

Fri Jan 24 12:58:45 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [27]:
import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Activation
from tensorflow.keras.activations import relu,sigmoid,softmax

In [28]:
# define the architecture

model = Sequential()

# input to 1st Hidden layer
model.add(Dense(units=128,kernel_initializer='he_uniform',activation=relu,input_dim=final_X_train.shape[1]))


# outcome from 1st H.Layer will be given to 2nd H.Layer
model.add(Dense(units=64,kernel_initializer='he_uniform',activation=relu))


# outcome from 2nd H.Layer will be given to 3rd H.Layer
model.add(Dense(units=32,kernel_initializer='he_uniform',activation=relu))

# outcome from 3rd H.Layer will be given to 4th H.Layer
model.add(Dense(units=16,kernel_initializer='he_uniform',activation=relu))


# outcome from 4th H.Layer will be given to 5th H.Layer
model.add(Dense(units=8,kernel_initializer='he_uniform',activation=relu))


# outcome from 5th H.Layer will be given to output layer
model.add(Dense(units=3,kernel_initializer='glorot_uniform',activation=softmax))

In [29]:
model.summary()

In [30]:
# metrics
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])


In [31]:
y_train_p = tensorflow.keras.utils.to_categorical(final_y_train,num_classes=3)
y_train_p

array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1

In [32]:
# Train the Model
model.fit(final_X_train,y_train_p,batch_size=20,validation_split=0.2,epochs=50)

Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - acc: 0.3000 - loss: 1.0275 - val_acc: 0.5417 - val_loss: 0.7344
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - acc: 0.4201 - loss: 0.9104 - val_acc: 0.5417 - val_loss: 0.6844
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - acc: 0.5639 - loss: 0.7899 - val_acc: 0.7500 - val_loss: 0.5966
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - acc: 0.6285 - loss: 0.7236 - val_acc: 0.9167 - val_loss: 0.5374
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - acc: 0.7368 - loss: 0.6104 - val_acc: 0.9583 - val_loss: 0.4878
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - acc: 0.8333 - loss: 0.5076 - val_acc: 0.9583 - val_loss: 0.4373
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - acc: 0.8507 - loss: 0.448

<keras.src.callbacks.history.History at 0x7c59deb771d0>

In [33]:
# to get  all rows outcomes in form of values
outcome = model.predict(X_test)
d = []
for i in outcome:
  if np.argmax(i) == 0:
    d.append(0)
  elif np.argmax(i) == 1:
    d.append(1)
  else:
    d.append(2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 604ms/step


In [34]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [35]:
accuracy_score(final_y_test,d)

0.4666666666666667

In [36]:
confusion_matrix(final_y_test,d)

array([[ 0, 10,  0],
       [ 0,  3,  6],
       [ 0,  0, 11]])

In [37]:
print(classification_report(final_y_test,d))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        10
           1       0.23      0.33      0.27         9
           2       0.65      1.00      0.79        11

    accuracy                           0.47        30
   macro avg       0.29      0.44      0.35        30
weighted avg       0.31      0.47      0.37        30



In [38]:
# to check random data

values = np.random.random((2,2)).ravel().reshape(1,-1)

if np.argmax(model.predict(values)) == 0:
  print('setosa')
elif np.argmax(model.predict(values)) == 1:
  print('versicolor')
else:
  print('verginica')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
versicolor
