In [4]:
# ===============================
# Main Labs Thyroid Model Training
# ===============================

# -------------------------------
# 1. Import libraries
# -------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from xgboost import XGBClassifier
import pickle

# -------------------------------
# 2. Load dataset
# -------------------------------
df = pd.read_csv("thyroid_clinical_balanced.csv")

# -------------------------------
# 3. Select main lab features + age and sex
# -------------------------------
main_features = ['age', 'sex', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']
X = df[main_features]
y = df['target']  # multi-class target: 0=Normal, 1=Primary Hypothyroid, 2=Compensated, 3=Hyperthyroid

# -------------------------------
# 4. Train-test split (80-20)
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------
# 5. Initialize XGBoost classifier
# -------------------------------
# Using multi-class objective
model = XGBClassifier(
    objective='multi:softprob',  # for multi-class classification
    eval_metric='mlogloss',
    use_label_encoder=False,
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    random_state=42
)

# -------------------------------
# 6. Train the model
# -------------------------------
model.fit(X_train, y_train)

# -------------------------------
# 7. Evaluate the model
# -------------------------------
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Optional: ROC-AUC for multi-class
try:
    roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
    print("\nROC-AUC Score:", roc_auc)
except Exception as e:
    print("ROC-AUC not calculated:", e)

# -------------------------------
# 8. Save the trained model
# -------------------------------
with open("thyroid_features.pkl", "wb") as f:
    pickle.dump(model, f)

print("\nModel saved as thyroid_features.pkl")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.9761273209549072

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.98      0.98       123
           1       0.96      0.84      0.90        62
           2       0.98      0.99      0.99       516
           3       0.96      0.98      0.97        53

    accuracy                           0.98       754
   macro avg       0.97      0.95      0.96       754
weighted avg       0.98      0.98      0.98       754


Confusion Matrix:
 [[121   0   0   2]
 [  0  52  10   0]
 [  3   2 511   0]
 [  1   0   0  52]]

ROC-AUC Score: 0.998406886504168

Model saved as thyroid_features.pkl


In [3]:
import pandas as pd
import pickle
import numpy as np

# Load the main-labs model
with open("main_labs_thyroid_model.pkl", "rb") as f:
    model = pickle.load(f)

# Features used in this model
features = ['age', 'sex', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']

# Sample patient data
patients = [
    { 'age': 35, 'sex': 0, 'TSH': 2.5, 'T3': 1.8, 'TT4': 9.5, 'T4U': 1.0, 'FTI': 9.5 },   
    { 'age': 50, 'sex': 0, 'TSH': 7.5, 'T3': 1.2, 'TT4': 70,  'T4U': 0.9, 'FTI': 50 },    
    { 'age': 28, 'sex': 0, 'TSH': 0.1, 'T3': 4.2, 'TT4': 15.8, 'T4U': 1.6, 'FTI': 120 },  
    { 'age': 45, 'sex': 0, 'TSH': 4.5, 'T3': 2.0, 'TT4': 90,  'T4U': 1.0, 'FTI': 95 },    
]

df_patients = pd.DataFrame(patients)

# Make predictions
preds = model.predict(df_patients)
probas = model.predict_proba(df_patients)

# Assuming your model has classes [0,1,2,3] same as before
class_names = ['Normal', 'Primary Hypothyroid', 'Hyperthyroid', 'Compensated Hypothyroid']

for i, (pred, proba) in enumerate(zip(preds, probas)):
    condition = class_names[pred]
    confidence = np.max(proba) * 100
    print(f"Patient {i+1}: {condition} ({confidence:.1f}% confidence)")


Patient 1: Compensated Hypothyroid (99.4% confidence)
Patient 2: Compensated Hypothyroid (80.9% confidence)
Patient 3: Hyperthyroid (99.3% confidence)
Patient 4: Normal (59.1% confidence)
