In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier,BaggingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import joblib

# Load the advanced dataset

df = pd.read_csv(r"C:\Users\Srinu\Downloads\thyroid_disease_updated_with_normal.csv")

# Separate features and target variable
X = df.drop(columns=["Diagnosis"])
y = df["Diagnosis"]

# Encode categorical features using One-Hot Encoding
categorical_features = ["Weight Change", "Heart Rate", "Temperature Sensitivity", "Digestive Issues"]
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
X_encoded = pd.DataFrame(encoder.fit_transform(X[categorical_features]))

# Rename encoded columns properly
encoded_feature_names = encoder.get_feature_names_out(categorical_features)
X_encoded.columns = encoded_feature_names

# Merge encoded features and drop originals
X = X.drop(columns=categorical_features).reset_index(drop=True)
X = pd.concat([X, X_encoded], axis=1)

# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Compute class weights for balanced training
class_weights = compute_class_weight('balanced', classes=np.unique(y_encoded), y=y_encoded)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# ------------------ Advanced Neural Network ------------------
nn_model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(np.unique(y_encoded)), activation='softmax')
])

# Compile the model
nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the neural network with class weights
nn_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test),
             class_weight=class_weight_dict, callbacks=[early_stopping])

# Extract deep features from the trained network
X_train_nn = nn_model.predict(X_train)
X_test_nn = nn_model.predict(X_test)

# ------------------ Ensemble Hybrid Classifiers ------------------
# Voting Classifier
voting_clf = VotingClassifier(estimators=[
    ('RF', RandomForestClassifier(n_estimators=150, random_state=42)),
    ('GB', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('XGB', XGBClassifier(eval_metric="mlogloss", random_state=42))
], voting='soft')
voting_clf.fit(X_train_nn, y_train)

# Bagging Classifier
bagging_clf = BaggingClassifier(estimator=RandomForestClassifier(), n_estimators=10, random_state=42)
bagging_clf.fit(X_train_nn, y_train)

# Stacking Classifier (Meta-learning)
stacking_clf = StackingClassifier(estimators=[
    ('RF', RandomForestClassifier(n_estimators=150, random_state=42)),
    ('GB', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('LGBM', LGBMClassifier(n_estimators=100, random_state=42))
], final_estimator=MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=2000, random_state=42))
stacking_clf.fit(X_train_nn, y_train)

# ------------------ Unified Final Model ------------------
# Combine hybrid predictions for the final model
nn_preds = nn_model.predict(X_test)
voting_preds = voting_clf.predict(X_test_nn).reshape(-1, 1)
bagging_preds = bagging_clf.predict(X_test_nn).reshape(-1, 1)
stacking_preds = stacking_clf.predict(X_test_nn).reshape(-1, 1)

combined_features = np.hstack([nn_preds, voting_preds, bagging_preds, stacking_preds])

# Final classifier
final_model = MLPClassifier(hidden_layer_sizes=(128, 64), activation='relu', max_iter=2000, random_state=42)
final_model.fit(combined_features, y_test)

# ------------------ Model Evaluation ------------------
final_predictions = final_model.predict(combined_features)
print("üîπ Accuracy:", accuracy_score(y_test, final_predictions))
print("üîπ Precision:", precision_score(y_test, final_predictions, average='weighted'))
print("üîπ Recall:", recall_score(y_test, final_predictions, average='weighted'))
print("üîπ F1 Score:", f1_score(y_test, final_predictions, average='weighted'))

# ------------------ Save Models and Preprocessors ------------------
nn_model.save("thyroid_nn_advanced.keras")
final_model_file = "final_unified_model_advanced.pkl"
joblib.dump(final_model, final_model_file)
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(encoder, "encoder.pkl")

# Save Hybrid Classifiers
joblib.dump(voting_clf, "voting_clf.pkl")
joblib.dump(bagging_clf, "bagging_clf.pkl")
joblib.dump(stacking_clf, "stacking_clf.pkl")

print("‚úÖ Advanced Unified Model and preprocessors saved successfully.")


Epoch 1/50
[1m55/55[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.3074 - loss: 1.9784 - val_accuracy: 0.6059 - val_loss: 1.4959
Epoch 2/50
[1m55/55[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5798 - loss: 1.0725 - val_accuracy: 0.6310 - val_loss: 1.1687
Epoch 3/50
[1m55/55[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6670 - loss: 0.8719 - val_accuracy: 0.7107 - val_loss: 0.9008
Epoch 4/50
[1m55/55[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6825 - loss: 0.7721 - val_accuracy: 0.6834 - val_loss: 0.7940
Epoch 5/50
[1m55/55[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6758 - loss: 0.7657 - val_accuracy: 0.7084 - val_loss: 0.6878


In [None]:
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf

# Load models and preprocessors
scaler = joblib.load("scaler.pkl")
label_encoder = joblib.load("label_encoder.pkl")
encoder = joblib.load("encoder.pkl")
nn_model = tf.keras.models.load_model("thyroid_nn_advanced.keras")
final_model = joblib.load("final_unified_model_advanced.pkl")

# Define categorical features
categorical_features = ["Weight Change", "Heart Rate", "Temperature Sensitivity", "Digestive Issues"]

# Convert the sample input to a DataFrame
sample_input = {
    "Fatigue": 1,
    "Weight Change": "No change",
    "Heart Rate": "Slow",
    "Temperature Sensitivity": "Cold",
    "Mood Changes": 1,
    "Hair/Nail Changes": 1,
    "Neck Swelling": 1,
    "Digestive Issues": "Constipation",
    "Eye Changes": 1,
    "Goiter Presence": 0,
    "Menstrual Irregularities": 1,
    "Hoarseness": 1,
    "Family History": 1,
    "Previous Thyroid Surgery": 0,
    "Radiation Exposure": 1,
    "Slow Reflexes": 1,
    "Puffy Face": 1,
    "Joint Pain & Stiffness": 1,
    "Muscle Weakness": 1,
    "Memory Problems (Brain Fog)": 1,
    "Dry Skin & Brittle Nails": 1,
    "Sweating Excessively": 0
}


sample_df = pd.DataFrame([sample_input])

# Encode categorical features
encoded_sample = pd.DataFrame(encoder.transform(sample_df[categorical_features]))
encoded_sample.columns = encoder.get_feature_names_out(categorical_features)

# Merge encoded features with the rest
sample_df = sample_df.drop(columns=categorical_features).reset_index(drop=True)
sample_df = pd.concat([sample_df, encoded_sample], axis=1)

# Scale the features
sample_scaled = scaler.transform(sample_df)

# Neural Network prediction (feature extraction)
nn_features = nn_model.predict(sample_scaled)

# Ensure nn_features is 2D
if nn_features.ndim == 1:
    nn_features = nn_features.reshape(-1, 1)

# Get predictions from each hybrid classifier and ensure 2D shape
voting_pred = voting_clf.predict(nn_features).reshape(-1, 1)
bagging_pred = bagging_clf.predict(nn_features).reshape(-1, 1)
stacking_pred = stacking_clf.predict(nn_features).reshape(-1, 1)

# Ensure all arrays have the same number of samples
print("Shapes Before Stacking:")
print("NN Features:", nn_features.shape)
print("Voting Pred:", voting_pred.shape)
print("Bagging Pred:", bagging_pred.shape)
print("Stacking Pred:", stacking_pred.shape)

# Combine all features for the final meta-classifier
combined_features = np.hstack([nn_features, voting_pred, bagging_pred, stacking_pred])
print("Success - Combined Features Shape:", combined_features.shape)

# Ensure final_model receives correct input shape
final_prediction = final_model.predict(combined_features)

# Decode the predicted disease
predicted_disease = label_encoder.inverse_transform(final_prediction)

print("üîπ Sample Input for Testing:\n", sample_input)
print("\nüîπ Predicted Thyroid Disease:", predicted_disease[0])
