In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Loading the dataset
data = pd.read_csv('heart_statlog_cleveland_hungary_final.csv')

# Checking for missing values
print(data.isnull().sum())

# Fill missing values with the mean of the column
data.fillna(data.mean(), inplace=True)

# Display the first few rows of the dataset to understand its structure
print(data.head())

# Assuming 'sex', 'chest pain type', 'fasting blood sugar', 'resting ecg', 'exercise angina', and 'ST slope' are categorical
categorical_columns = ['sex', 'chest pain type', 'fasting blood sugar', 'resting ecg', 'exercise angina', 'ST slope']

# Converting categorical columns to numerical format using one-hot encoding
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Separating features and target variable
X = data.drop('target', axis=1)
y = data['target']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)



age                    0
sex                    0
chest pain type        0
resting bp s           0
cholesterol            0
fasting blood sugar    0
resting ecg            0
max heart rate         0
exercise angina        0
oldpeak                0
ST slope               0
target                 0
dtype: int64
   age  sex  chest pain type  resting bp s  cholesterol  fasting blood sugar  \
0   40    1                2           140          289                    0   
1   49    0                3           160          180                    0   
2   37    1                2           130          283                    0   
3   48    0                4           138          214                    0   
4   54    1                3           150          195                    0   

   resting ecg  max heart rate  exercise angina  oldpeak  ST slope  target  
0            0             172                0      0.0         1       0  
1            0             156                0     

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Initialize the model
mlr = LinearRegression()

# Train the model
mlr.fit(X_train, y_train)

# Make predictions
y_pred_mlr = mlr.predict(X_test)

# Evaluate the model
mse_mlr = mean_squared_error(y_test, y_pred_mlr)
print(f'Mean Squared Error for MLR-F: {mse_mlr}')

Mean Squared Error for MLR-F: 0.10639583725280026


In [5]:
import numpy as np

class NeuralNet:
    def __init__(self, layers, epochs=1000, learning_rate=0.01, momentum=0.9, activation='sigmoid', validation_split=0.2):
        self.L = len(layers)
        self.n = layers.copy()
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.activation = activation
        self.validation_split = validation_split

        self.xi = [np.zeros(layers[lay]) for lay in range(self.L)]
        self.w = [np.zeros((1, 1))]
        for lay in range(1, self.L):
            self.w.append(np.random.randn(layers[lay], layers[lay - 1]) * 0.01)
        self.velocity = [np.zeros_like(w) for w in self.w]

        self.activation_function = self.get_activation_function(activation)
        self.activation_derivative = self.get_activation_derivative(activation)

        self.train_losses = []
        self.val_losses = []

    def get_activation_function(self, name):
        if name == 'sigmoid':
            return lambda z: 1 / (1 + np.exp(-z))
        elif name == 'relu':
            return lambda z: np.maximum(0, z)
        elif name == 'linear':
            return lambda z: z
        elif name == 'tanh':
            return lambda z: np.tanh(z)
        else:
            raise ValueError("Unsupported activation function")

    def get_activation_derivative(self, name):
        if name == 'sigmoid':
            return lambda z: z * (1 - z)
        elif name == 'relu':
            return lambda z: np.where(z > 0, 1, 0)
        elif name == 'linear':
            return lambda z: np.ones_like(z)
        elif name == 'tanh':
            return lambda z: 1 - z**2
        else:
            raise ValueError("Unsupported activation function")

    def forward_propagation(self, X):
        self.xi[0] = X
        for lay in range(1, self.L):
            self.xi[lay] = self.activation_function(np.dot(self.w[lay], self.xi[lay - 1]))
        return self.xi[-1]

    def backward_propagation(self, X, y):
        m = y.shape[1]  # Number of samples
        self.forward_propagation(X)
        deltas = [0] * self.L
        deltas[-1] = (self.xi[-1] - y) * self.activation_derivative(self.xi[-1])

        for lay in range(self.L - 2, 0, -1):
            deltas[lay] = np.dot(self.w[lay + 1].T, deltas[lay + 1]) * self.activation_derivative(self.xi[lay])

        for lay in range(1, self.L):
            self.velocity[lay] = self.momentum * self.velocity[lay] - self.learning_rate * np.dot(deltas[lay], self.xi[lay - 1].T) / m
            self.w[lay] += self.velocity[lay]

    def fit(self, X, y):
        # Split the data into training and validation sets
        split = int((1 - self.validation_split) * X.shape[1])
        X_train, X_val = X[:, :split], X[:, split:]
        y_train, y_val = y[:, :split], y[:, split:]

        for epoch in range(self.epochs):
            self.backward_propagation(X_train, y_train)
            if epoch % 100 == 0:
                train_loss = np.mean((self.xi[-1] - y_train) ** 2)
                val_loss = np.mean((self.forward_propagation(X_val) - y_val) ** 2)
                self.train_losses.append(train_loss)
                self.val_losses.append(val_loss)
                print(f'Epoch {epoch}, Train Loss: {train_loss}, Validation Loss: {val_loss}')

    def predict(self, X):
        return self.forward_propagation(X)

    def loss_epochs(self):
        return np.array(self.train_losses), np.array(self.val_losses)

# Example usage
layers = [X_train.shape[1], 9, 5, 1]
nn = NeuralNet(layers, epochs=1000, learning_rate=0.01, activation='sigmoid', validation_split=0.2)

# Train the neural network
nn.fit(X_train.T, y_train.values.reshape(1, -1))

# Make predictions
y_pred_nn = nn.predict(X_test.T)

# Evaluate the model
mse_nn = mean_squared_error(y_test, y_pred_nn.T)
print(f'Mean Squared Error for BP: {mse_nn}')

Epoch 0, Train Loss: 0.2501240888908279, Validation Loss: 0.2499222391603952
Epoch 100, Train Loss: 0.24907978297505567, Validation Loss: 0.2511613845563069
Epoch 200, Train Loss: 0.24884141432225404, Validation Loss: 0.2520564440057993
Epoch 300, Train Loss: 0.24879746491477917, Validation Loss: 0.2524976075075723
Epoch 400, Train Loss: 0.24878929822937115, Validation Loss: 0.25269781328873675
Epoch 500, Train Loss: 0.24878774452427616, Validation Loss: 0.2527859426165189
Epoch 600, Train Loss: 0.2487874156013423, Validation Loss: 0.25282427040848593
Epoch 700, Train Loss: 0.24878731371389703, Validation Loss: 0.2528408645345218
Epoch 800, Train Loss: 0.24878725387772171, Validation Loss: 0.2528480449607429
Epoch 900, Train Loss: 0.24878720175478183, Validation Loss: 0.25285116100263966
Mean Squared Error for BP: 0.24770101220831583


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convert data to torch tensors
X_train_tensor = torch.Tensor(X_train)
y_train_tensor = torch.Tensor(y_train).view(-1, 1)
X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.Tensor(y_test).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 9)
        self.fc2 = nn.Linear(9, 5)
        self.fc3 = nn.Linear(5, 1)
    
    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model, loss function, and optimizer
model = SimpleNet()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Train the model
epochs = 1000
for epoch in range(epochs):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

# Make predictions
y_pred_bp_f = model(X_test_tensor).detach().numpy()

# Evaluate the model
mse_bp_f = mean_squared_error(y_test, y_pred_bp_f)
print(f'Mean Squared Error for BP-F: {mse_bp_f}')

ValueError: could not determine the shape of object type 'Series'