In [3]:
#improved model with dropout layers for predicting PCI values using an Artificial Neural Network (ANN) with TensorFlow and Keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler

# Prepare the data
X = pci_data.drop('PCI (%)', axis=1)  # Features
y = pci_data['PCI (%)']               # Target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the improved ANN model
model = Sequential([
    Dense(units=128, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(0.001)),
    Dropout(0.3),  # Dropout layer to prevent overfitting
    Dense(units=64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.3),  # Another Dropout layer
    Dense(units=32, activation='relu', kernel_regularizer=l2(0.001)),
    Dense(units=1, activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping callback to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train_scaled, y_train, epochs=200, batch_size=16, validation_split=0.2, verbose=1, callbacks=[early_stopping])

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error on Test Data: {mse}')
print(f'R^2 Score on Test Data: {r2}')

# Plot the actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue', label='Predicted vs Actual')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', lw=2, label='Perfect Prediction')
plt.title('Actual vs Predicted PCI Values')
plt.xlabel('Actual PCI')
plt.ylabel('Predicted PCI')
plt.legend()
plt.grid(True)
plt.show()


NameError: name 'pci_data' is not defined

In [None]:
# model with batch normalization

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Prepare the data (assuming pci_data is your DataFrame)
X = pci_data.drop('PCI', axis=1)  # Features
y = pci_data['PCI']               # Target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the improved ANN model
model = Sequential()

# Input layer with batch normalization
model.add(Dense(units=128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(BatchNormalization())

# Hidden layers with Dropout and Batch Normalization
model.add(Dense(units=128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(units=64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(units=32, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

# Output layer
model.add(Dense(units=1, activation='linear'))

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=300, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping])

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error on Test Data: {mse}')
print(f'R^2 Score on Test Data: {r2}')

# Plot the actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue', label='Predicted vs Actual')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', lw=2, label='Perfect Prediction')
plt.title('Actual vs Predicted PCI Values')
plt.xlabel('Actual PCI')
plt.ylabel('Predicted PCI')
plt.legend()
plt.grid(True)
plt.show()
