<h1 style="color:#2192f1" align="center"> MULTI CLASS CLASSIFICATION</h1>


<h2 align="center" style="color:#f0e912">IMPORT LIBRARIES</h2>


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from LogisticRegression.src.multi_class_classification import *

<h2 align="center" style="color:#f0e912">DATA EXPLORATION</h2>


In [2]:
df = pd.read_csv("../data/multi_class_classification_dataset/dermatology.csv")

In [3]:
df.head()

Unnamed: 0,erythema,scaling,definite_borders,itching,koebner_phenomenon,polygonal_papules,follicular_papules,oral_mucosal_involvement,knee_and_elbow_involvement,scalp_involvement,...,disappearance_granular_layer,vacuolisation_damage_basal_layer,spongiosis,saw_tooth_appearance_retes,follicular_horn_plug,perifollicular_parakeratosis,inflammatory_mononuclear_infiltrate,band_like_infiltrate,age,class
0,2,2,0,3,0,0,0,0,1,0,...,0,0,3,0,0,0,1,0,55,2
1,3,3,3,2,1,0,0,0,1,1,...,0,0,0,0,0,0,1,0,8,1
2,2,1,2,3,1,3,0,3,0,0,...,0,2,3,2,0,0,2,3,26,3
3,2,2,2,0,0,0,0,0,3,2,...,3,0,0,0,0,0,3,0,40,1
4,2,3,2,2,2,2,0,2,0,0,...,2,3,2,3,0,0,2,3,45,3


In [4]:
df.describe()

Unnamed: 0,erythema,scaling,definite_borders,itching,koebner_phenomenon,polygonal_papules,follicular_papules,oral_mucosal_involvement,knee_and_elbow_involvement,scalp_involvement,...,focal_hypergranulosis,disappearance_granular_layer,vacuolisation_damage_basal_layer,spongiosis,saw_tooth_appearance_retes,follicular_horn_plug,perifollicular_parakeratosis,inflammatory_mononuclear_infiltrate,band_like_infiltrate,class
count,366.0,366.0,366.0,366.0,366.0,366.0,366.0,366.0,366.0,366.0,...,366.0,366.0,366.0,366.0,366.0,366.0,366.0,366.0,366.0,366.0
mean,2.068306,1.795082,1.54918,1.36612,0.63388,0.448087,0.166667,0.377049,0.614754,0.519126,...,0.393443,0.464481,0.456284,0.953552,0.453552,0.103825,0.114754,1.86612,0.554645,2.803279
std,0.664753,0.701527,0.907525,1.138299,0.908016,0.957327,0.570588,0.834147,0.982979,0.905639,...,0.849406,0.864899,0.954873,1.130172,0.954744,0.450433,0.488723,0.726108,1.105908,1.597803
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
50%,2.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0
75%,2.0,2.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,1.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,4.0
max,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,6.0


<h2 align="center" style="color:#f0e912">DATA PREPROCESSING</h2>


In [5]:
df["age"] = df["age"].apply(lambda x: int(x) if x.isdigit() else None)

In [6]:
df.dropna(inplace=True)

In [7]:
# Classes start from 0
df["class"] = df["class"].apply(lambda x: x - 1)

In [8]:
class_counts = df["class"].value_counts()
df["weights"] = df["class"].apply(lambda x: 1/class_counts[x])

In [9]:
df_train = df.sample(random_state=42, frac=0.7, weights="weights")
df_test = df.drop(df_train.index)
df.drop("weights", axis=1, inplace=True)

In [10]:
nb_classes = df["class"].value_counts().count()

<h3 align="center" style="color:#5F75BF">CLASS BALANCE CHECK</h3>


In [11]:
df["class"].value_counts()

class
0    111
2     71
1     60
4     48
3     48
5     20
Name: count, dtype: int64

In [12]:
df_train["class"].value_counts()

class
0    57
1    47
2    47
4    40
3    40
5    20
Name: count, dtype: int64

<h3 align="center" style="color:#5F75BF">ADD INTERCEPT TERM</h3>


In [13]:
# Transform features and target into numpy
X = df_train.drop("class", axis=1)
y = df_train["class"].values.reshape(-1, 1)
# Intercept feature
i_term = np.ones((len(X), 1))
# Add intercept feature in other features
X_intercept = np.concatenate((i_term, X), axis=1)

In [14]:
# Transform features and target into numpy
X_test = df_test.drop("class", axis=1)
y_test = df_test["class"].values.reshape(-1, 1)
# Intercept feature
i_term_test = np.ones((len(X_test), 1))
# Add intercept feature in other features
X_test_intercept = np.concatenate((i_term_test, X_test), axis=1)

<h2 align="center" style="color:#f0e912">TRAINING</h2>


In [15]:
theta_list = [np.zeros((X_intercept.shape[1], 1)) for _ in range(6)]

In [16]:
proba_class_i(X_intercept[0], theta_list, 4) - 1 * (y[0]==4).item()

array([-0.83333333])

In [17]:
theta_list[0].shape

(36, 1)

In [18]:
grad = proba_class_i(X_intercept[0], theta_list, 4) - 1 * (y[0]==4).item() * X_intercept[0]
grad = grad.reshape(-1, 1)
grad.shape

(36, 1)

In [19]:
test = theta_list[4] - grad

In [20]:
test

array([[ 0.83333333],
       [ 1.83333333],
       [ 1.83333333],
       [ 0.83333333],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [ 1.83333333],
       [-0.16666667],
       [ 2.83333333],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [ 1.83333333],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [-0.16666667],
       [ 2.83333333],
       [-0.16666667],
       [28.83333333],
       [-0.14583333]])

In [44]:
theta_list, final_loss, loss_history = gradient_descent(X_intercept, y, type="SGD", alpha=0.001)

In [45]:
print(loss_history)

[[449.73162678]
 [478.07008627]
 [487.94282083]
 [499.92765595]
 [516.00910731]
 [556.74994417]
 [559.04652605]
 [652.92249783]
 [611.0591996 ]
 [610.72685659]
 [669.43736383]
 [653.73829295]
 [652.29021378]
 [586.96224519]
 [613.88151858]
 [488.73033438]
 [484.61420575]
 [434.5846327 ]
 [432.7861208 ]
 [463.16285107]
 [476.74453447]
 [517.4903647 ]
 [515.75142134]
 [469.85386691]
 [473.14179698]
 [516.11200438]
 [519.18886299]
 [490.83349193]
 [481.91287319]
 [554.92002043]
 [630.79823728]
 [602.70502284]
 [641.70256844]
 [589.77033036]
 [560.68481491]
 [558.39280148]
 [495.05383341]
 [493.49266759]
 [448.37112011]
 [431.42681226]
 [528.11945903]
 [492.42989913]
 [438.16813918]
 [473.87926932]
 [544.84536998]
 [513.33988834]
 [442.55111212]
 [443.38783493]
 [458.9007511 ]
 [459.6707198 ]
 [485.5972089 ]
 [440.67641482]
 [511.82636049]
 [508.7602412 ]
 [492.79162445]
 [496.21983725]
 [469.18502188]
 [451.78449784]
 [423.02608586]
 [434.33706257]
 [439.24263931]
 [450.44456485]
 [431.96

<h2 align="center" style="color:#f0e912">TESTING</h2>
