In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier
import pickle

# -------------------------------
# Step 1: Load clinical balanced dataset
# -------------------------------
df = pd.read_csv("thyroid_clinical_balanced.csv")
print("Loaded dataset:", df.shape)
print(df['target'].value_counts())

# -------------------------------
# Step 2: Prepare features and target
# -------------------------------
numeric_features = ['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']
X = df.drop('target', axis=1)
y = df['target']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale numeric features
scaler = StandardScaler()
X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])
X_test[numeric_features] = scaler.transform(X_test[numeric_features])

# -------------------------------
# Step 3: Build stacking ensemble
# -------------------------------
estimators = [
    ('rf', RandomForestClassifier(n_estimators=400, max_depth=12, random_state=42)),
    ('xgb', XGBClassifier(n_estimators=400, max_depth=6, learning_rate=0.1, 
                          use_label_encoder=False, eval_metric='mlogloss', random_state=42))
]

ensemble = StackingClassifier(
    estimators=estimators, 
    final_estimator=LogisticRegression(max_iter=1000, random_state=42),
    cv=5, n_jobs=-1
)

# Fit model
ensemble.fit(X_train, y_train)

# Evaluate
y_pred = ensemble.predict(X_test)
print("Training Accuracy:", accuracy_score(y_train, ensemble.predict(X_train)))
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# -------------------------------
# Step 4: Save model components
# -------------------------------
model_components = {
    'ensemble': ensemble,
    'scaler': scaler,
    'numeric_features': numeric_features,
    'class_names': {0: 'Normal', 1: 'Hypothyroid', 2: 'Hyperthyroid', 3: 'Compensated Hypothyroid'}
}

with open("thyroid_model_clinical.pkl", "wb") as f:
    pickle.dump(model_components, f)

print("Model saved as 'thyroid_model_clinical.pkl'")
