In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Load the dataset
data = pd.read_csv('heart_statlog_cleveland_hungary_final.csv')

# Checking for missing values
print("Missing values in each column:")
print(data.isnull().sum())

# Fill missing values with the mean of the column
data.fillna(data.mean(), inplace=True)

# Display the first few rows of the dataset to understand its structure
print(data.head())

# Assuming 'sex', 'chest pain type', 'fasting blood sugar', 'resting ecg', 'exercise angina', and 'ST slope' are categorical
categorical_columns = ['sex', 'chest pain type', 'fasting blood sugar', 'resting ecg', 'exercise angina', 'ST slope']

# Converting categorical columns to numerical format using one-hot encoding
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Ensure all data is numeric
data = data.apply(pd.to_numeric)

# Detecting and handling outliers
# Determine the number of rows and columns for the subplots
num_columns = len(data.columns)
num_rows = (num_columns + 3) // 4  # Adjust for 4 columns per row

# Plotting boxplots to visualize outliers
plt.figure(figsize=(15, num_rows * 3))
for i, column in enumerate(data.columns, 1):
    plt.subplot(num_rows, 4, i)
    sns.boxplot(data[column])
    plt.title(column)
plt.tight_layout()
plt.show()

# Removing outliers using Z-score method
z_scores = np.abs(stats.zscore(data.select_dtypes(include=[np.number])))
data = data[(z_scores < 3).all(axis=1)]

# Separating features and target variable
X = data.drop('target', axis=1)
y = data['target']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Confirm the shapes of the train and test sets
print(f"Training set shape: {X_train.shape}, Testing set shape: {X_test.shape}")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Initialize the model
mlr = LinearRegression()

# Train the model
mlr.fit(X_train, y_train)

# Make predictions
y_pred_mlr = mlr.predict(X_test)

# Evaluate the model
mse_mlr = mean_squared_error(y_test, y_pred_mlr)
print(f'Mean Squared Error for MLR-F: {mse_mlr}')

In [31]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

class NeuralNet:
    def __init__(self, layers, epochs=1000, learning_rate=0.01, momentum=0.9, activation='sigmoid', validation_split=0.2, patience=10, batch_size=32):
        """
        Initialize the neural network with the given parameters.
        
        Parameters:
        - layers: List of integers, the number of neurons in each layer.
        - epochs: Integer, the number of epochs for training.
        - learning_rate: Float, the learning rate for gradient descent.
        - momentum: Float, the momentum factor for gradient descent.
        - activation: String, the activation function to use ('sigmoid', 'relu', 'linear', 'tanh').
        - validation_split: Float, the proportion of data to use for validation.
        - patience: Integer, the number of epochs to wait for improvement before early stopping.
        - batch_size: Integer, the number of samples per batch.
        """
        self.L = len(layers)
        self.n = layers
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.fact = activation
        self.validation_split = validation_split
        self.patience = patience
        self.batch_size = batch_size

        self.h = [np.zeros((layer, 1)) for layer in layers]
        self.xi = [np.zeros((layer, 1)) for layer in layers]
        self.w = [np.random.randn(layers[i], layers[i-1]) * 0.01 if i > 0 else np.zeros((1, 1)) for i in range(self.L)]
        self.theta = [np.zeros((layer, 1)) for layer in layers]
        self.delta = [np.zeros((layer, 1)) for layer in layers]
        self.d_w = [np.zeros_like(w) for w in self.w]
        self.d_theta = [np.zeros_like(theta) for theta in self.theta]
        self.d_w_prev = [np.zeros_like(w) for w in self.w]
        self.d_theta_prev = [np.zeros_like(theta) for theta in self.theta]

        self.activation_function = self.get_activation_function(activation)
        self.activation_derivative = self.get_activation_derivative(activation)

        self.train_losses = []
        self.val_losses = []

    def get_activation_function(self, name):
        """Return the activation function based on the given name."""
        if name == 'sigmoid':
            return lambda z: 1 / (1 + np.exp(-np.clip(z, -500, 500)))
        elif name == 'relu':
            return lambda z: np.maximum(0, z)
        elif name == 'linear':
            return lambda z: z
        elif name == 'tanh':
            return lambda z: np.tanh(z)
        else:
            raise ValueError("Unsupported activation function")

    def get_activation_derivative(self, name):
        """Return the derivative of the activation function based on the given name."""
        if name == 'sigmoid':
            return lambda z: z * (1 - z)
        elif name == 'relu':
            return lambda z: np.where(z > 0, 1, 0)
        elif name == 'linear':
            return lambda z: np.ones_like(z)
        elif name == 'tanh':
            return lambda z: 1 - z**2
        else:
            raise ValueError("Unsupported activation function")

    def forward_propagation(self, X):
        """Perform forward propagation through the network."""
        self.xi[0] = X
        for lay in range(1, self.L):
            self.h[lay] = np.dot(self.w[lay], self.xi[lay - 1]) - self.theta[lay]
            self.xi[lay] = self.activation_function(self.h[lay])
        return self.xi[-1]

    def backward_propagation(self, X, y):
        """Perform backward propagation through the network."""
        m = y.shape[1]
        self.forward_propagation(X)
        self.delta[-1] = (self.xi[-1] - y) * self.activation_derivative(self.h[-1])

        for lay in range(self.L - 2, 0, -1):
            self.delta[lay] = self.activation_derivative(self.h[lay]) * np.dot(self.w[lay + 1].T, self.delta[lay + 1])

        for lay in range(1, self.L):
            self.d_w[lay] = np.dot(self.delta[lay], self.xi[lay - 1].T) / m
            self.d_theta[lay] = np.sum(self.delta[lay], axis=1, keepdims=True) / m

            self.d_w_prev[lay] = self.momentum * self.d_w_prev[lay] - self.learning_rate * self.d_w[lay]
            self.d_theta_prev[lay] = self.momentum * self.d_theta_prev[lay] - self.learning_rate * self.d_theta[lay]

            self.w[lay] += self.d_w_prev[lay]
            self.theta[lay] += self.d_theta_prev[lay]

    def fit(self, X, y):
        """Train the neural network using the training data."""
        kf = KFold(n_splits=4, shuffle=True, random_state=42)
        validation_errors = []

        best_val_loss = float('inf')
        patience_counter = 0

        for train_index, val_index in kf.split(X.T):
            X_train, X_val = X[:, train_index], X[:, val_index]
            y_train, y_val = y[:, train_index], y[:, val_index]

            for epoch in range(self.epochs):
                for i in range(0, X_train.shape[1], self.batch_size):
                    end = i + self.batch_size
                    self.backward_propagation(X_train[:, i:end], y_train[:, i:end])
                
                if epoch % 100 == 0:
                    train_loss = np.mean((self.forward_propagation(X_train) - y_train) ** 2)
                    val_loss = np.mean((self.forward_propagation(X_val) - y_val) ** 2)
                    self.train_losses.append(train_loss)
                    self.val_losses.append(val_loss)
                    print(f'Epoch {epoch}, Train Loss: {train_loss}, Validation Loss: {val_loss}')

                # Early stopping
                val_loss = np.mean((self.forward_propagation(X_val) - y_val) ** 2)
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                else:
                    patience_counter += 1

                if patience_counter >= self.patience:
                    print(f'Early stopping at epoch {epoch}')
                    break

            val_error = np.mean((self.forward_propagation(X_val) - y_val) ** 2)
            validation_errors.append(val_error)

        self.validation_error = np.mean(validation_errors)
        print(f'Validation Error: {self.validation_error}')

    def predict(self, X):
        """Make predictions using the trained neural network."""
        return self.forward_propagation(X)

    def loss_epochs(self):
        """Return the training and validation losses."""
        return np.array(self.train_losses), np.array(self.val_losses)

# Example usage
# Ensure you have the preprocessed data as numpy arrays
# X_train_np, y_train_np, X_test_np, y_test should be numpy arrays

# If you have pandas DataFrame/Series, convert them to numpy arrays
X_train_np = X_train.to_numpy().T if hasattr(X_train, 'to_numpy') else X_train.T
y_train_np = y_train.to_numpy().reshape(1, -1) if hasattr(y_train, 'to_numpy') else y_train.reshape(1, -1)
X_test_np = X_test.to_numpy().T if hasattr(X_test, 'to_numpy') else X_test.T

layers = [X_train_np.shape[0], 9, 5, 1]
nn = NeuralNet(layers, epochs=1000, learning_rate=0.01, activation='sigmoid', validation_split=0.2, patience=10, batch_size=32)

# Train the neural network
nn.fit(X_train_np, y_train_np)

# Make predictions
y_pred_nn = nn.predict(X_test_np)

# Evaluate the model
mse_nn = mean_squared_error(y_test, y_pred_nn.T)
print(f'Mean Squared Error for BP: {mse_nn}')

Epoch 0, Train Loss: 0.25002213468882206, Validation Loss: 0.25001223380457177
Early stopping at epoch 10
Epoch 0, Train Loss: 0.2500279690106443, Validation Loss: 0.2500082525178327
Early stopping at epoch 10
Epoch 0, Train Loss: 0.2500121556307631, Validation Loss: 0.2500720051405847
Early stopping at epoch 0
Epoch 0, Train Loss: 0.2500333104605538, Validation Loss: 0.25000993559250806
Early stopping at epoch 0
Validation Error: 0.25002643017062937
Mean Squared Error for BP: 0.2500123076115742


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import mean_squared_error

# Ensure data is in numpy array format
X_train = X_train.to_numpy() if hasattr(X_train, 'to_numpy') else X_train
y_train = y_train.to_numpy() if hasattr(y_train, 'to_numpy') else y_train
X_test = X_test.to_numpy() if hasattr(X_test, 'to_numpy') else X_test
y_test = y_test.to_numpy() if hasattr(y_test, 'to_numpy') else y_test

# Convert data to torch tensors
X_train_tensor = torch.Tensor(X_train)
y_train_tensor = torch.Tensor(y_train).view(-1, 1)
X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.Tensor(y_test).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 9)
        self.fc2 = nn.Linear(9, 5)
        self.fc3 = nn.Linear(5, 1)
    
    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model, loss function, and optimizer
model = SimpleNet()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Train the model
epochs = 1000
for epoch in range(epochs):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Make predictions
y_pred_bp_f = model(X_test_tensor).detach().numpy()

# Evaluate the model
mse_bp_f = mean_squared_error(y_test, y_pred_bp_f)
print(f'Mean Squared Error for BP-F: {mse_bp_f}')