In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout, LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load your dataset
data = pd.read_csv(r'C:\Users\BHARGAVI CHOWDARY\Desktop\PROJECT_ML\diabetes_012_health_indicators_BRFSS2015.csv')

# Separate features and labels
X = data.drop(columns=['Diabetes_012'])  # Replace 'Diabetes_012' with your label column name
y = data['Diabetes_012']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply SMOTE to balance the classes in the training set
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Convert labels to categorical for multi-class classification
y_train_res = tf.keras.utils.to_categorical(y_train_res, 3)
y_test = tf.keras.utils.to_categorical(y_test, 3)

# Define the Neural Network component with increased complexity
def neural_network(input_shape):
    inputs = Input(shape=input_shape)
    x = Dense(512)(inputs)  # Increased neurons
    x = LeakyReLU(alpha=0.1)(x)  # Leaky ReLU activation
    x = Dropout(0.1)(x)  # Reduced dropout
    x = Dense(256)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dropout(0.1)(x)
    x = Dense(128)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dropout(0.1)(x)
    x = Dense(64)(x)
    x = LeakyReLU(alpha=0.1)(x)
    return Model(inputs, x)

# Define the Decision Forest layer
def decision_forest_layer(x, num_trees=5, num_classes=3):
    tree_outputs = []
    for i in range(num_trees):
        tree_output = Dense(num_classes, activation='softmax', name=f'tree_{i+1}')(x)
        tree_outputs.append(tree_output)
    output = tf.keras.layers.Average()(tree_outputs)
    return output

# Build the Deep Neural Decision Forest Model
input_shape = (X_train.shape[1],)
nn_model = neural_network(input_shape)
decision_forest_output = decision_forest_layer(nn_model.output)

# Create the final model
model = Model(inputs=nn_model.input, outputs=decision_forest_output)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Set up callbacks for learning rate reduction and early stopping
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)

# Train the model with increased epochs and batch size tuning
history = model.fit(X_train_res, y_train_res, validation_data=(X_test, y_test), 
                    epochs=200, batch_size=32, callbacks=[reduce_lr, early_stopping])  # Increased epochs and adjusted batch size

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Generate predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Classification report
report = classification_report(y_true, y_pred_classes, target_names=['Class 0', 'Class 1', 'Class 2'])
print(report)

# Confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1', 'Class 2'], yticklabels=['Class 0', 'Class 1', 'Class 2'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# Plotting training and validation accuracy/loss

# Plot accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Model Accuracy')
plt.show()

# Plot loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Model Loss')
plt.show()