In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd 

from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression

In [2]:
#Load csv
df = pd.read_csv("./dataset/tbpred.csv")

In [3]:
#Label Encode 'Country of Birth' 
le = preprocessing.LabelEncoder()
df['encoded_country'] = le.fit_transform(df['CountryofBirth'])
X = df.drop(['Name','CountryofBirth'], axis=1)

In [None]:
def metrics_calculator(y_test, y_pred, model_name):
    '''
    This function calculates all desired performance metrics for a given model.
    '''
    result = pd.DataFrame(data=[accuracy_score(y_test, y_pred),
                                precision_score(y_test, y_pred, average='macro'),
                                recall_score(y_test, y_pred, average='macro'),
                                f1_score(y_test, y_pred, average='macro')],
                          index=['Accuracy','Precision','Recall','F1-score'],
                          columns = [model_name])
    return result


In [4]:
X1 = X.drop('tb_pred', axis=1)
Y1 = X['tb_pred']
# Fitting Logistic Regression to the Training set
classifier = LogisticRegression(solver='liblinear', random_state = 0)
classifier.fit(X1, Y1)

LogisticRegression(random_state=0, solver='liblinear')

In [7]:
y_pred=classifier.predict(X1)

accuracy = metrics.accuracy_score(Y1, y_pred)
print("train accuracy: %.2f." %accuracy)

precision = metrics.precision_score(Y1, y_pred)
print("train precision: %.2f." %precision)

recall = metrics.recall_score(Y1, y_pred)
print("train recall: %.2f." %recall)

train accuracy: 0.84.
train precision: 0.73.
train recall: 0.52.


In [9]:
# Calculate the percentage of True and False predictions
percentage_true = (y_pred == 1).sum() / len(y_pred) * 100
percentage_false = (y_pred == 0).sum() / len(y_pred) * 100

# Display the percentages
print(f"Percentage of True Predictions: {percentage_true:.2f}%")
print(f"Percentage of False Predictions: {percentage_false:.2f}%")

Percentage of True Predictions: 17.35%
Percentage of False Predictions: 82.65%


In [11]:
# To save an encoder
from joblib import dump
dump(le, 'label_encoder.joblib')

# To load the encoder
from joblib import load
le = load('label_encoder.joblib')

from joblib import dump
dump(classifier, 'classifier.joblib')

['classifier.joblib']