In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.utils import class_weight

# --- Step 1: Load cleaned data ---
df = pd.read_csv('thyroid_final_3class_data.csv')

# --- Step 2: Select 19 realistic features (exclude leaky ones) ---
feature_cols = [
    'age', 'sex', 'on_thyroxine', 'on_antithyroid_medication', 'sick', 'pregnant',
    'thyroid_surgery', 'I131_treatment', 'lithium', 'goitre', 'tumor', 
    'hypopituitary', 'psych', 'TSH', 'T3', 'TT4', 'T4U', 'FTI'
]
X = df[feature_cols]
y = df['target']

# --- Step 3: Train-test split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --- Step 4: Compute class weights ---
weights = class_weight.compute_class_weight(
    class_weight='balanced', classes=np.unique(y_train), y=y_train
)
sample_weights = np.array([weights[label] for label in y_train])

# --- Step 5: Train weighted XGBoost ---
model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=3,
    eval_metric='mlogloss'
)

print("\n--- Training weighted XGBoost on 19 realistic features ---")
model.fit(X_train, y_train, sample_weight=sample_weights)
print("--- Training complete ---")

# --- Step 6: Evaluate ---
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Normal', 'Hypothyroid', 'Hyperthyroid']))

# --- Step 7: Save model ---
model_filename = 'thyroid_model_weighted_19features.joblib'
joblib.dump(model, model_filename)
print(f"\n--- Weighted model saved to {model_filename} ---")



--- Training weighted XGBoost on 19 realistic features ---
--- Training complete ---
Classification Report:
              precision    recall  f1-score   support

      Normal       0.96      0.93      0.95      1061
 Hypothyroid       0.22      0.36      0.28        44
Hyperthyroid       0.00      0.00      0.00        14

    accuracy                           0.90      1119
   macro avg       0.39      0.43      0.41      1119
weighted avg       0.92      0.90      0.91      1119


--- Weighted model saved to thyroid_model_weighted_19features.joblib ---


In [3]:
import pandas as pd
import joblib

# --- Step 1: Load the Weighted Model ---
model_filename = 'thyroid_model_weighted_19features.joblib'
model = joblib.load(model_filename)
print(f"Model '{model_filename}' loaded successfully.")

# --- Step 2: Define Sample Patients ---

# Sample 1: Classic Hyperthyroid
sample1 = {
    'age': 32, 'sex': 0, 'on_thyroxine':0, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':0, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':0.005, 'T3':4.5, 'TT4':180.0, 'T4U':1.15, 'FTI':195.0
}
# Sample 2: Classic Hypothyroid
sample2 = {
    'age': 45, 'sex': 1, 'on_thyroxine':1, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':1, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':15.0, 'T3':0.8, 'TT4':50.0, 'T4U':0.7, 'FTI':40.0
}
# Sample 3: Normal
sample3 = {
    'age': 28, 'sex': 0, 'on_thyroxine':0, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':0, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':2.0, 'T3':1.2, 'TT4':90.0, 'T4U':1.0, 'FTI':100.0
}

# Convert to DataFrame
input_df = pd.DataFrame([sample1, sample2, sample3])

# --- Step 3: Make Predictions ---
pred_numeric = model.predict(input_df)
pred_proba = model.predict_proba(input_df)

# --- Step 4: Interpret Predictions ---
diagnosis_map = {0:'Normal', 1:'Hypothyroid', 2:'Hyperthyroid'}

for i, (num, proba) in enumerate(zip(pred_numeric, pred_proba), 1):
    print(f"\nSample {i} Prediction: {num} ({diagnosis_map[num]})")
    print(f"Probabilities -> Normal: {proba[0]:.2%}, Hypothyroid: {proba[1]:.2%}, Hyperthyroid: {proba[2]:.2%}")


Model 'thyroid_model_weighted_19features.joblib' loaded successfully.

Sample 1 Prediction: 2 (Hyperthyroid)
Probabilities -> Normal: 17.70%, Hypothyroid: 0.02%, Hyperthyroid: 82.28%

Sample 2 Prediction: 1 (Hypothyroid)
Probabilities -> Normal: 3.08%, Hypothyroid: 96.92%, Hyperthyroid: 0.01%

Sample 3 Prediction: 0 (Normal)
Probabilities -> Normal: 99.99%, Hypothyroid: 0.00%, Hyperthyroid: 0.00%


In [4]:
import pandas as pd
import joblib

# --- Load the Weighted Model ---
model_filename = 'thyroid_model_weighted_19features.joblib'
model = joblib.load(model_filename)
print(f"Model '{model_filename}' loaded successfully.")

# --- Define New Sample Patients ---

# Sample 1: Mild Hyperthyroid
sample1 = {
    'age': 40, 'sex': 1, 'on_thyroxine':0, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':1, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':0.2, 'T3':3.8, 'TT4':160.0, 'T4U':1.1, 'FTI':180.0
}
# Sample 2: Moderate Hypothyroid
sample2 = {
    'age': 55, 'sex': 0, 'on_thyroxine':1, 'on_antithyroid_medication':0,
    'sick':1, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':0, 'tumor':0, 'hypopituitary':0, 'psych':1,
    'TSH':12.0, 'T3':0.7, 'TT4':55.0, 'T4U':0.6, 'FTI':45.0
}
# Sample 3: Normal
sample3 = {
    'age': 35, 'sex': 1, 'on_thyroxine':0, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':0, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':1.8, 'T3':1.0, 'TT4':95.0, 'T4U':0.98, 'FTI':102.0
}

# Convert to DataFrame
input_df = pd.DataFrame([sample1, sample2, sample3])

# --- Make Predictions ---
pred_numeric = model.predict(input_df)
pred_proba = model.predict_proba(input_df)

# --- Interpret Predictions ---
diagnosis_map = {0:'Normal', 1:'Hypothyroid', 2:'Hyperthyroid'}

for i, (num, proba) in enumerate(zip(pred_numeric, pred_proba), 1):
    print(f"\nSample {i} Prediction: {num} ({diagnosis_map[num]})")
    print(f"Probabilities -> Normal: {proba[0]:.2%}, Hypothyroid: {proba[1]:.2%}, Hyperthyroid: {proba[2]:.2%}")


Model 'thyroid_model_weighted_19features.joblib' loaded successfully.

Sample 1 Prediction: 2 (Hyperthyroid)
Probabilities -> Normal: 0.55%, Hypothyroid: 0.00%, Hyperthyroid: 99.45%

Sample 2 Prediction: 1 (Hypothyroid)
Probabilities -> Normal: 4.72%, Hypothyroid: 95.28%, Hyperthyroid: 0.01%

Sample 3 Prediction: 0 (Normal)
Probabilities -> Normal: 100.00%, Hypothyroid: 0.00%, Hyperthyroid: 0.00%


In [5]:
import pandas as pd
import joblib

# --- Load the Weighted Model ---
model_filename = 'thyroid_model_weighted_19features.joblib'
model = joblib.load(model_filename)
print(f"Model '{model_filename}' loaded successfully.")

# --- Define New Sample Patients ---

# Sample 1: Severe Hyperthyroid
sample1 = {
    'age': 29, 'sex': 0, 'on_thyroxine':0, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':1, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':0.01, 'T3':5.0, 'TT4':200.0, 'T4U':1.2, 'FTI':210.0
}

# Sample 2: Mild Hypothyroid
sample2 = {
    'age': 50, 'sex': 1, 'on_thyroxine':1, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':0, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':6.0, 'T3':1.0, 'TT4':70.0, 'T4U':0.8, 'FTI':60.0
}

# Sample 3: Normal with slightly higher TT4
sample3 = {
    'age': 42, 'sex': 0, 'on_thyroxine':0, 'on_antithyroid_medication':0,
    'sick':0, 'pregnant':0, 'thyroid_surgery':0, 'I131_treatment':0,
    'lithium':0, 'goitre':0, 'tumor':0, 'hypopituitary':0, 'psych':0,
    'TSH':2.2, 'T3':1.3, 'TT4':105.0, 'T4U':1.0, 'FTI':108.0
}

# Convert to DataFrame
input_df = pd.DataFrame([sample1, sample2, sample3])

# --- Make Predictions ---
pred_numeric = model.predict(input_df)
pred_proba = model.predict_proba(input_df)

# --- Interpret Predictions ---
diagnosis_map = {0:'Normal', 1:'Hypothyroid', 2:'Hyperthyroid'}

for i, (num, proba) in enumerate(zip(pred_numeric, pred_proba), 1):
    print(f"\nSample {i} Prediction: {num} ({diagnosis_map[num]})")
    print(f"Probabilities -> Normal: {proba[0]:.2%}, Hypothyroid: {proba[1]:.2%}, Hyperthyroid: {proba[2]:.2%}")


Model 'thyroid_model_weighted_19features.joblib' loaded successfully.

Sample 1 Prediction: 2 (Hyperthyroid)
Probabilities -> Normal: 18.75%, Hypothyroid: 0.01%, Hyperthyroid: 81.24%

Sample 2 Prediction: 0 (Normal)
Probabilities -> Normal: 99.96%, Hypothyroid: 0.04%, Hyperthyroid: 0.00%

Sample 3 Prediction: 0 (Normal)
Probabilities -> Normal: 100.00%, Hypothyroid: 0.00%, Hyperthyroid: 0.00%
