In [28]:
# To make debugging of logistic_regression module easier we enable imported modules autoreloading feature.
# By doing this you may change the code of logistic_regression library and all these changes will be available here.
%load_ext autoreload
%autoreload 2

# Add project root folder to module loading paths.
import sys
sys.path.append('../..')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Import Dependencies

In [29]:
# Import 3rd party dependencies.
import pandas as pd
from sklearn.model_selection import train_test_split

# Import custom logistic regression implementation.
from homemade.utils.metrics import Metrics
from homemade.utils.array import Array
from homemade.logistic_regression.logistic_regression import LogisticRegression

### Load the Data

In [30]:
# Load the data.
data = pd.read_csv('../../data/multiclass_classification_data.csv')

# Print the data table.
data.head(10)

Unnamed: 0,X1,X2,X3,X4,X5,label
0,-2.25027,-1.646135,1.43657,-0.972041,1.643378,2
1,-1.785269,-0.565972,1.346475,-1.806297,-1.176173,2
2,1.589542,0.796631,-1.328454,-2.148804,-0.831955,1
3,1.832943,-0.284234,-1.907732,-1.425174,1.318302,0
4,1.012438,0.080761,-0.998305,-1.293794,-0.916274,0
5,1.164023,1.780825,-0.450353,-0.272858,-0.049029,1
6,1.881237,-0.825735,-2.143935,-1.297455,-0.361463,0
7,0.841014,-1.028548,-1.225741,-0.972856,1.276965,0
8,0.127641,-2.144851,-0.930062,-0.348827,-0.593811,0
9,0.855492,-0.663649,-1.105884,-0.985451,0.453509,0


In [31]:
# Split data into features (X) và target (y)
X = data.drop("label", axis=1)
y = data["label"]

# Divide into training and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
# prepare data for training
X_train = Array(x_train.values.tolist())
Y_train = Array(y_train.values.tolist())
X_test = Array(x_test.values.tolist())
Y_test = Array(y_test.values.tolist())

# Set up logistic regression parameters.
learning_rate = 0.01
iterations = 10000
# Init logistic regression instance.
logistic_regression = LogisticRegression(X_train, Y_train)

# Train logistic regression.
model_params = logistic_regression.train(learning_rate, iterations)

Start training
Final parameters: Cost: 0.2574718051621523, model parameters: [(Array([0.9786221330622762, -1.7731010799595366, -1.8192728160335179, 0.3523868519661082, 0.0024734065420297198]), -1.2168045075500697), (Array([0.6279890565579888, 3.0154080838386386, 0.571290884666473, 0.5534205028915334, 0.1465800567208011]), -1.2833638725710985), (Array([-1.6287725779370237, -0.7926911530385427, 1.4563980837557797, -0.9636313235115346, -0.17315642899519632]), -1.1565830474933199)]


### Evaluate the accuracy of trained model

In [37]:
# predict with test set
y_predictions = logistic_regression.predict(X_test, model_params)

# calculate f1 score, accuracy_score
f1_score = Metrics.f1_score(Y_test, y_predictions)
accuracy_score = Metrics.accuracy_score(Y_test, y_predictions)
print(f"f1 score: {f1_score}, accuracy score: {accuracy_score}")


f1 score: 0.920353982300885, accuracy score: 0.84
