In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [5]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.001, reg_lambda=0.01):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda
        self.caches = {}
        self.D = {}
        self.Z = {}
        self.A = {}
        self.parameters = {}
        self.dropout_rate = 0.5
        self.epsilon = 1e-8  # For Adam optimizer and batch norm

        layer_sizes = [input_size] + hidden_sizes + [output_size]
        for i in range(1, len(layer_sizes)):
            self.parameters[f'W{i}'] = np.random.randn(layer_sizes[i - 1], layer_sizes[i]) * np.sqrt(
                2. / layer_sizes[i - 1])
            self.parameters[f'b{i}'] = np.zeros((1, layer_sizes[i]))
            if i < len(layer_sizes) - 1:
                self.parameters[f'gamma{i}'] = np.ones((1, layer_sizes[i]))
                self.parameters[f'beta{i}'] = np.zeros((1, layer_sizes[i]))

            # Adam optimizer parameters
            self.parameters[f'm_W{i}'] = np.zeros_like(self.parameters[f'W{i}'])
            self.parameters[f'v_W{i}'] = np.zeros_like(self.parameters[f'W{i}'])
            self.parameters[f'm_b{i}'] = np.zeros_like(self.parameters[f'b{i}'])
            self.parameters[f'v_b{i}'] = np.zeros_like(self.parameters[f'b{i}'])

            if i < len(layer_sizes) - 1:
                self.parameters[f'm_gamma{i}'] = np.zeros_like(self.parameters[f'gamma{i}'])
                self.parameters[f'v_gamma{i}'] = np.zeros_like(self.parameters[f'gamma{i}'])
                self.parameters[f'm_beta{i}'] = np.zeros_like(self.parameters[f'beta{i}'])
                self.parameters[f'v_beta{i}'] = np.zeros_like(self.parameters[f'beta{i}'])

    def leaky_relu(self, Z, alpha=0.01):
        return np.maximum(alpha * Z, Z)

    def leaky_relu_derivative(self, Z, alpha=0.01):
        dZ = np.ones_like(Z)
        dZ[Z < 0] = alpha
        return dZ

    def batch_norm(self, Z, gamma, beta, cache=None):
        if cache is None:  # Training mode
            mean = np.mean(Z, axis=0, keepdims=True)
            var = np.var(Z, axis=0, keepdims=True)
            Z_norm = (Z - mean) / np.sqrt(var + self.epsilon)
            out = gamma * Z_norm + beta
            cache = (Z_norm, mean, var, gamma, beta)
            return out, cache
        else:  # Test mode
            Z_norm, mean, var, gamma, beta = cache
            return gamma * Z_norm + beta, cache

    def stable_softmax(self, Z):
        exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

    def forward(self, X, training=True):
        if isinstance(X, pd.DataFrame):
            X = X.values

        self.Z = {}
        self.A = {0: X}
        self.caches = {}
        self.D = {}  # Dropout masks

        # ... rest of the method remains the same

        for i in range(1, len(self.hidden_sizes) + 2):
            self.Z[i] = self.A[i - 1] @ self.parameters[f'W{i}'] + self.parameters[f'b{i}']

            if i < len(self.hidden_sizes) + 1:
                self.Z[i], self.caches[i] = self.batch_norm(self.Z[i], self.parameters[f'gamma{i}'],
                                                            self.parameters[f'beta{i}'])
                self.A[i] = self.leaky_relu(self.Z[i])
                if training:
                    self.D[i] = np.random.binomial(1, 1 - self.dropout_rate, size=self.A[i].shape) / (
                                1 - self.dropout_rate)
                    self.A[i] *= self.D[i]
            else:
                self.A[i] = self.stable_softmax(self.Z[i])

        return self.A[len(self.hidden_sizes) + 1]

    def backward(self, X, Y):
        m = X.shape[0]
        gradients = {}

        dZ = self.A[len(self.hidden_sizes) + 1] - Y

        for i in range(len(self.hidden_sizes) + 1, 0, -1):
            if i == len(self.hidden_sizes) + 1:
                gradients[f'dW{i}'] = (1 / m) * (self.A[i - 1].T @ dZ) + (self.reg_lambda / m) * self.parameters[
                    f'W{i}']
                gradients[f'db{i}'] = (1 / m) * np.sum(dZ, axis=0, keepdims=True)
            else:
                dZ = dZ @ self.parameters[f'W{i + 1}'].T
                dZ *= self.leaky_relu_derivative(self.Z[i])

                if i > 0:  # Apply dropout
                    dZ *= self.D[i]

                # Batch norm backward pass
                dZ_norm = dZ * self.parameters[f'gamma{i}']
                Z_norm, mean, var, gamma, beta = self.caches[i]

                gradients[f'dgamma{i}'] = np.sum(dZ * Z_norm, axis=0, keepdims=True)
                gradients[f'dbeta{i}'] = np.sum(dZ, axis=0, keepdims=True)

                dZ = (1. / m) * gamma * (var + self.epsilon) ** (-1. / 2.) * (
                            m * dZ - np.sum(dZ, axis=0) - Z_norm * np.sum(dZ * Z_norm, axis=0))

                gradients[f'dW{i}'] = (1 / m) * (self.A[i - 1].T @ dZ) + (self.reg_lambda / m) * self.parameters[
                    f'W{i}']
                gradients[f'db{i}'] = (1 / m) * np.sum(dZ, axis=0, keepdims=True)

        return gradients

    def update_parameters(self, gradients, t, beta1=0.9, beta2=0.999):
        for i in range(1, len(self.hidden_sizes) + 2):
            for param in ['W', 'b']:
                self.parameters[f'm_{param}{i}'] = beta1 * self.parameters[f'm_{param}{i}'] + (1 - beta1) * gradients[
                    f'd{param}{i}']
                self.parameters[f'v_{param}{i}'] = beta2 * self.parameters[f'v_{param}{i}'] + (1 - beta2) * (
                            gradients[f'd{param}{i}'] ** 2)

                m_hat = self.parameters[f'm_{param}{i}'] / (1 - beta1 ** t)
                v_hat = self.parameters[f'v_{param}{i}'] / (1 - beta2 ** t)

                self.parameters[f'{param}{i}'] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

            if i < len(self.hidden_sizes) + 1:
                for param in ['gamma', 'beta']:
                    self.parameters[f'm_{param}{i}'] = beta1 * self.parameters[f'm_{param}{i}'] + (1 - beta1) * \
                                                       gradients[f'd{param}{i}']
                    self.parameters[f'v_{param}{i}'] = beta2 * self.parameters[f'v_{param}{i}'] + (1 - beta2) * (
                                gradients[f'd{param}{i}'] ** 2)

                    m_hat = self.parameters[f'm_{param}{i}'] / (1 - beta1 ** t)
                    v_hat = self.parameters[f'v_{param}{i}'] / (1 - beta2 ** t)

                    self.parameters[f'{param}{i}'] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

    def train(self, X, y, epochs, batch_size=32, validation_data=None,
              early_stopping_patience=5, min_accuracy=0.8):
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values

        y_one_hot = np.eye(self.output_size)[y]
        best_val_loss = float('inf')
        patience_counter = 0
        t = 0  # For Adam optimizer

        for epoch in range(epochs):
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X_shuffled = X[indices]
            y_shuffled = y_one_hot[indices]

            for i in range(0, X.shape[0], batch_size):
                t += 1
                X_batch = X_shuffled[i:i + batch_size]
                y_batch = y_shuffled[i:i + batch_size]

                self.forward(X_batch, training=True)
                gradients = self.backward(X_batch, y_batch)
                self.update_parameters(gradients, t)

            if epoch % 10 == 0:
                train_loss = self.compute_loss(X, y_one_hot)
                train_accuracy = self.score(X, y)

                print(f"Epoch {epoch}/{epochs}")
                print(f"Train Accuracy: {train_accuracy:.4f}")

                if validation_data:
                    X_val, y_val = validation_data
                    if hasattr(X_val, 'values'):
                        X_val = X_val.values
                    if hasattr(y_val, 'values'):
                        y_val = y_val.values
                    val_loss = self.compute_loss(X_val, np.eye(self.output_size)[y_val])
                    val_accuracy = self.score(X_val, y_val)
                    print(f"Val Accuracy: {val_accuracy:.4f}")

                    if val_accuracy >= min_accuracy:
                        print(f"Reached minimum accuracy of {min_accuracy}. Stopping training.")
                        return

                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        patience_counter = 0
                    else:
                        patience_counter += 1

                    if patience_counter >= early_stopping_patience and val_accuracy >= min_accuracy:
                        print("Early stopping")
                        return

    def compute_loss(self, X, y):
        y_pred = self.forward(X, training=False)
        return -np.mean(y * np.log(y_pred + self.epsilon))

    def predict(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return np.argmax(self.forward(X, training=False), axis=1)

    def score(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.Series):
            y = y.values
        return np.mean(self.predict(X) == y)

In [8]:
def preprocess(filename):
    df = shuffle_dataframe(pd.read_csv(filename))
    y = df.iloc[:, 0]
    df.drop(columns='Unnamed: 0', inplace=True)
    y = convert_array(y)
    print(df, "\n", y)
    return df, y


def shuffle_dataframe(df):
    """
    Shuffle the rows of a DataFrame.

    Parameters:
    df (pandas.DataFrame): The input DataFrame to be shuffled.

    Returns:
    pandas.DataFrame: A new DataFrame with shuffled rows.
    """

    # Create a copy of the original DataFrame to avoid modifying it
    df_shuffled = df.copy()

    # Get the number of rows in the DataFrame
    n = df_shuffled.shape[0]

    # Create a random permutation of indices
    shuffled_indices = np.random.permutation(n)

    # Reindex the DataFrame with the shuffled indices
    df_shuffled = df_shuffled.iloc[shuffled_indices].reset_index(drop=True)

    return df_shuffled


def convert_array(input_array):
    return [1 if item.lower().startswith('pt_fibro') else 0 if item.lower().startswith('pt_ctrl') else item for item in
            input_array]


def plot_decision_boundary(X, y, model, title):
    # Reduce dimensionality to 2D using PCA
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)

    # Create a mesh grid
    x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
    y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))

    # Make predictions on the mesh grid
    Z = model.predict(pca.inverse_transform(np.c_[xx.ravel(), yy.ravel()]))
    Z = Z.reshape(xx.shape)

    # Plot the decision boundary
    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, alpha=0.8)
    plt.title(title)
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    plt.colorbar()
    plt.show()




In [9]:
X_train, y_train = preprocess('MB_data_train.csv')
# Normalize your input data
"""print("X_train std\n", X_train.std())
train_std = X_train.std()
train_std = np.where(train_std == 0, 1, train_std)
X_train_normalized = (X_train - X_train.mean()) / train_std
print("X_train after normalize - \n", X_train_normalized)"""
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_val_normalized = scaler.transform(X_val)

# Create and train the network
input_size = X_train_normalized.shape[1]
hidden_sizes = [128, 64, 32]
output_size = 2
nn = NeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.0005, reg_lambda=0.01)

# Train with early stopping and validation data
nn.train(X_train_normalized, y_train, epochs=10000, batch_size=64, validation_data=(X_val_normalized, y_val),
         early_stopping_patience=20)

# Evaluate the network
train_accuracy = nn.score(X_train_normalized, y_train)
val_accuracy = nn.score(X_val_normalized, y_val)
print(f"Final Train Accuracy: {train_accuracy:.4f}")
print(f"Final Validation Accuracy: {val_accuracy:.4f}")


      0  1  2  3    4     5    6    7  8  9  ...  1610  1611  1612  1613  \
0     0  0  6  0    0     0    0    0  0  4  ...     0     0     4     2   
1     0  0  6  0    0     0  887  817  7  0  ...     0   114     1     0   
2     0  0  0  0  358     0    0    0  0  0  ...     0     0    54     0   
3     0  0  0  0    0     0    0    0  0  1  ...    93     0    35     6   
4    22  0  3  1    0     1    0    0  0  0  ...     0     0    56     3   
..  ... .. .. ..  ...   ...  ...  ... .. ..  ...   ...   ...   ...   ...   
95    0  0  0  0    0     0    0    0  0  0  ...     0     0    22    17   
96  125  0  4  0    0  3263    0  712  0  0  ...     0     0    93     4   
97    0  0  0  0    0   229    0    0  0  0  ...     0     0    49     3   
98    0  0  0  0    0     0    0    0  5  0  ...     0     0    23   310   
99    1  0  0  0    0     0    0    0  1  0  ...     0    15     0     3   

    1614  1615  1616  1617  1618  1619  
0      2     0     0     0     0     0  
1    

Epoch 1080/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5000
Epoch 1090/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1100/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1110/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1120/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1130/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1140/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1150/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1160/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1170/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1180/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1190/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1200/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1210/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1220/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1230/10000
Train Accuracy: 1.0000
Val Accuracy: 0.5500
Epoch 1240/10000
Train A

KeyboardInterrupt: 