# Machine Learning Model: Multi-Class Classification

## Imports

In [37]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd 


from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression

## 1 Loading CSV

In [38]:
#Load csv
df = pd.read_csv("./dataset/tbpred.csv")

## 2 Data Preprocessing

In [39]:
#Label Encode 'Country of Birth' 
le = preprocessing.LabelEncoder()
df['encoded_country'] = le.fit_transform(df['CountryofBirth'])
X = df.drop(['Name','CountryofBirth'], axis=1)

In [40]:
X1 = X.drop('tb_pred', axis=1)
Y1 = X['tb_pred']

## 3 Split Data into Training and Testing Set

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X1, Y1, test_size = 0.20)

## 4 Generate Classification Model

In [42]:
# Fitting Logistic Regression to the Training set
classifier = LogisticRegression(solver='liblinear', random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0, solver='liblinear')

## 5 Predict and Evaluate Model

In [43]:
y_pred=classifier.predict(X_test)

accuracy = metrics.accuracy_score(y_test, y_pred)
print("train accuracy: %.2f." %accuracy)

precision = metrics.precision_score(y_test, y_pred)
print("train precision: %.2f." %precision)

recall = metrics.recall_score(y_test, y_pred)
print("train recall: %.2f." %recall)

train accuracy: 0.86.
train precision: 0.79.
train recall: 0.56.


In [47]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.95      0.91       304
           1       0.79      0.56      0.66        96

    accuracy                           0.86       400
   macro avg       0.83      0.76      0.79       400
weighted avg       0.85      0.86      0.85       400



### Checking for Percentages

In [44]:
# Calculate the percentage of True and False predictions
percentage_true = (y_pred == 1).sum() / len(y_pred) * 100
percentage_false = (y_pred == 0).sum() / len(y_pred) * 100

# Display the percentages
print(f"Percentage of True Predictions: {percentage_true:.2f}%")
print(f"Percentage of False Predictions: {percentage_false:.2f}%")

Percentage of True Predictions: 17.00%
Percentage of False Predictions: 83.00%


In [45]:
df1 = pd.DataFrame()
df1['Original'] = y_test[:10]
df1['Predicted'] = y_pred[:10]

print(df1)

      Original  Predicted
555          0          0
908          0          0
1553         0          0
578          0          0
12           0          0
1974         1          1
1821         0          0
1209         1          0
812          0          0
1891         0          0


## 6 Saving Model

In [46]:
# To save an encoder
from joblib import dump
dump(le, 'label_encoder.joblib')

from joblib import dump
dump(classifier, 'classifier.joblib')

['classifier.joblib']