# Multiclass Classification


## Using Logistic Regression
- Imports 

In [1]:
import numpy as np
import pandas as pd
from ml_lib.linear_models.logistic_regression import LogisticRegression
from ml_lib.metrics.math import accuracy
from ml_lib.preprocessing.scaler import StandardScaler
from ml_lib.utils.data import train_test_split
from ml_lib.preprocessing.pipeline import Pipeline
from ml_lib.preprocessing.imputer import SimpleImputer

- Load Data

In [2]:
train_df= pd.read_csv(r"C:/project/datasets/train_multi_class.csv")
test_df= pd.read_csv(r"C:/project/datasets/test_multi_class.csv")

test_df.info()
train_df_clean=train_df.dropna(subset=[train_df.columns[-1]])

print("Original rows:", len(train_df))
print("Rows kept:", len(train_df_clean))
print("Rows removed:", len(train_df) - len(train_df_clean))



X= train_df_clean.drop("target",axis=1).values
y=train_df_clean["target"].values

print(f"Shape of X : {X.shape}\nShape of y : {y.shape}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25000 entries, 0 to 24999
Data columns (total 40 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   feature_0   25000 non-null  float64
 1   feature_1   25000 non-null  float64
 2   feature_2   25000 non-null  float64
 3   feature_3   25000 non-null  float64
 4   feature_4   25000 non-null  float64
 5   feature_5   25000 non-null  float64
 6   feature_6   25000 non-null  float64
 7   feature_7   25000 non-null  float64
 8   feature_8   25000 non-null  float64
 9   feature_9   25000 non-null  float64
 10  feature_10  25000 non-null  float64
 11  feature_11  25000 non-null  float64
 12  feature_12  25000 non-null  float64
 13  feature_13  25000 non-null  float64
 14  feature_14  25000 non-null  float64
 15  feature_15  25000 non-null  float64
 16  feature_16  25000 non-null  float64
 17  feature_17  25000 non-null  float64
 18  feature_18  25000 non-null  float64
 19  feature_19  25000 non-nul

preprocessing

In [3]:
pipe = Pipeline([
    ('imputer',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
    ])

X_trans= pipe.fit(X)
print(X_trans.shape)

X_train,y_train,X_val,y_val= train_test_split(X_trans,y)

(49999, 40)


count unique classes

In [4]:
classes = np.unique(y)
n_classes = len(classes)

print("the classes are",classes ,"and the no of classes are ", n_classes )

the classes are [0. 1. 2. 3. 4.] and the no of classes are  5


### Train Model 

In [5]:
models=[]
for cls in classes:
    y_binary= (y_train==cls).astype(int)

    model = LogisticRegression(
        lr= 0.005,
        epochs=5000
    )
    model.fit(X_train,y_binary)
    models.append(model)

NOte  it takes much time

Predictions function for all classes 

In [6]:
def multiclass_predict(X):
    probs=[]
    for model in models:
        probs.append(model.predict_proba(X))

    probs = np.array(probs)
    return np.argmax(probs,axis=0)

In [8]:
y_pred = multiclass_predict(X_val)
acc=accuracy(y_val,y_pred)

print(f"Test accuracy : {acc}")

Test accuracy : 0.5705570557055706


In [9]:
feature_cols = train_df.columns[:-1]  # exclude target
X_test = test_df[feature_cols].values
print(X_test.shape)

X_test_trans = pipe.transform(X_test)
predictions = multiclass_predict(X_test_trans)
test_df["target"]= predictions
test_df.to_csv(r"C:/project/datasets/processed/multiclass_classification_LR.csv")

(25000, 40)


## Classification using Neural Network

- IMPORTS


In [10]:
from ml_lib.neural_network.layers import Dense
from ml_lib.neural_network.sequential import Sequential
from ml_lib.neural_network.losses import CategoricalCrossEntropy
from ml_lib.preprocessing.encoding import LabelEncoder, OneHotEncoder

- Preprocessing

In [11]:
y= np.nan_to_num(y, nan=5)
y = y.astype(int)

le= LabelEncoder()
y_train_enc = le.fit_transform(y)

ohe = OneHotEncoder()
y_train_oh = ohe.fit_transform(y_train_enc.reshape(-1,1))



print("Train labels:", np.unique(y_train_enc))
print("One-hot shape:", y_train_oh.shape)



Train labels: [0 1 2 3 4]
One-hot shape: (49999, 5)


In [18]:
model = Sequential([
(Dense(128, activation="relu",init= "he")),
(Dense(64, activation="relu",init="he")),
(Dense(n_classes, activation="softmax",init="xavier"))])

model.compile(optimizer="adam", loss= CategoricalCrossEntropy(),lr=0.005)

history = model.fit(X_trans, y_train_oh, epochs=1000)

Epoch 0 | Loss: 10.7264
Epoch 20 | Loss: 1.7995
Epoch 40 | Loss: 1.1702
Epoch 60 | Loss: 1.0217
Epoch 80 | Loss: 0.9343
Epoch 100 | Loss: 0.8720
Epoch 120 | Loss: 0.8242
Epoch 140 | Loss: 0.7861
Epoch 160 | Loss: 0.7552
Epoch 180 | Loss: 0.7294
Epoch 200 | Loss: 0.7083
Epoch 220 | Loss: 0.6898
Epoch 240 | Loss: 0.6728
Epoch 260 | Loss: 0.6576
Epoch 280 | Loss: 0.6434
Epoch 300 | Loss: 0.6302
Epoch 320 | Loss: 0.6180
Epoch 340 | Loss: 0.6067
Epoch 360 | Loss: 0.5966
Epoch 380 | Loss: 0.5878
Epoch 400 | Loss: 0.5800
Epoch 420 | Loss: 0.5730
Epoch 440 | Loss: 0.5667
Epoch 460 | Loss: 0.5609
Epoch 480 | Loss: 0.5556
Epoch 500 | Loss: 0.5507
Epoch 520 | Loss: 0.5462
Epoch 540 | Loss: 0.5419
Epoch 560 | Loss: 0.5379
Epoch 580 | Loss: 0.5342
Epoch 600 | Loss: 0.5308
Epoch 620 | Loss: 0.5276
Epoch 640 | Loss: 0.5246
Epoch 660 | Loss: 0.5217
Epoch 680 | Loss: 0.5191
Epoch 700 | Loss: 0.5165
Epoch 720 | Loss: 0.5141
Epoch 740 | Loss: 0.5118
Epoch 760 | Loss: 0.5096
Epoch 780 | Loss: 0.5075
Epoch

- check the accuracy percentage

In [19]:
y_val_pred = model.predict(X_val)
y_val_pred = np.argmax(y_val_pred, axis=1)
y_val_labels=le.inverse_transform(y_val_pred)

val_acc= accuracy(y_val,y_val_labels)*100
print(f"Validation Accuracy : ",val_acc,"%")

Validation Accuracy :  84.86848684868487 %


for test data

In [20]:
y_test_pred = model.predict(X_test_trans)
test_df["target"]= predictions
test_df.to_csv(r"C:/project/datasets/processed/multiclass_classification_NN.csv")
