In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import os
pip install 
import xlsxwriter

SyntaxError: invalid syntax (4169204426.py, line 8)

In [None]:
# Set a random seed for reproducibility
np.random.seed(7191)

# Define the Neural Network class
class Neural:

    def __init__(self, layers: list, epochs: int,
                 learning_rate: float = 0.001, batch_size: int = 32,
                 validation_split: float = 0.2, verbose: int = 0):
        self._layer_structure = layers
        self._batch_size = batch_size
        self._epochs = epochs
        self._learning_rate = learning_rate
        self._validation_split = validation_split
        self._verbose = verbose
        self._losses = {"train": [], "validation": []}
        self._is_fit = False
        self.__layers = None

    def fit(self, X: np.ndarray, y: np.ndarray):
        X, X_val, y, y_val = train_test_split(X, y, test_size=self._validation_split, random_state=42)
        self.__layers = self.__init_layers()

        for epoch in range(self._epochs):
            epoch_losses = []
            for i in range(0, len(X), self._batch_size):
                x_batch = X[i:i+self._batch_size]
                y_batch = y[i:i+self._batch_size]
                pred, hidden = self.__forward(x_batch)
                loss = self.__calculate_loss(y_batch, pred)
                epoch_losses.append(np.mean(loss ** 2))
                self.__backward(hidden, loss)

            valid_preds, _ = self.__forward(X_val)
            train_loss = np.mean(epoch_losses)
            valid_loss = np.mean(self.__calculate_mse(valid_preds, y_val))
            self._losses["train"].append(train_loss)
            self._losses["validation"].append(valid_loss)

            if self._verbose:
                print(f"Epoch {epoch}: Train MSE: {train_loss}, Valid MSE: {valid_loss}")

        self._is_fit = True

    def predict(self, X: np.ndarray):
        if not self._is_fit:
            raise Exception("Model has not been trained yet.")
        pred, _ = self.__forward(X)
        return pred

    def plot_learning(self, config_idx, run_idx):
        output_dir = 'learning_curves'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        plt.figure()
        plt.plot(self._losses["train"], label="Train Loss")
        plt.plot(self._losses["validation"], label="Validation Loss")
        plt.title(f"Learning Curve for Config {config_idx}, Run {run_idx}")
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        file_name = f"learning_curve_config_{config_idx}_run_{run_idx}.png"
        plt.savefig(os.path.join(output_dir, file_name))
        plt.close()

    def __init_layers(self):
        layers = []
        for i in range(1, len(self._layer_structure)):
            layers.append([
                np.random.rand(self._layer_structure[i-1], self._layer_structure[i]) / 5 - .1,
                np.ones((1, self._layer_structure[i]))
            ])
        return layers

    def __forward(self, batch: np.ndarray):
        hidden = [batch]
        for i in range(len(self.__layers)):
            batch = np.matmul(batch, self.__layers[i][0]) + self.__layers[i][1]
            if i < len(self.__layers) - 1:
                batch = np.maximum(batch, 0)
            hidden.append(batch)
            
            # Check for NaN values
            if np.isnan(batch).any():
                print(f"NaN detected in layer {i+1}")
                print(f"Input to this layer: {hidden[i]}")
                print(f"Weights: {self.__layers[i][0]}")
                print(f"Biases: {self.__layers[i][1]}")
                raise ValueError(f"NaN values detected in layer {i+1}")
        
        return batch, hidden

    def __calculate_loss(self, actual: np.ndarray, predicted: np.ndarray):
        return predicted - actual

    def __calculate_mse(self, actual: np.ndarray, predicted: np.ndarray):
        return (actual - predicted) ** 2

    def __backward(self, hidden, grad):
        for i in range(len(self.__layers)-1, -1, -1):
            if i != len(self.__layers) - 1:
                grad = np.multiply(grad, np.heaviside(hidden[i+1], 0))

            w_grad = hidden[i].T @ grad
            b_grad = np.mean(grad, axis=0)

            self.__layers[i][0] -= w_grad * self._learning_rate
            self.__layers[i][1] -= b_grad * self._learning_rate

            grad = grad @ self.__layers[i][0].T


In [None]:
# Load the dataset (replace with your actual file path)
df = pd.read_csv('data.csv')

# Prepare the dataset
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})  # Assuming 'M' and 'B' are diagnosis values
df = df.drop(['id', 'Unnamed: 32'], axis=1)

# Select features and target
X = df[['radius_mean', 'perimeter_mean', 'area_mean', 'concave points_worst', 'perimeter_worst', 'radius_worst', 'concave points_mean']]
y = df['diagnosis']

scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.to_numpy().reshape(-1, 1)
y_test = y_test.to_numpy().reshape(-1, 1)


In [None]:
# Prepare the Excel file for logging
workbook = xlsxwriter.Workbook('results.xlsx')
worksheet = workbook.add_worksheet()

# Write headers in the Excel file
headers = ["Config", "Run", "Layer Structure", "Learning Rate", "Batch Size", "Validation Split", "Epochs", "Test Error", "Percent"]
for col, header in enumerate(headers):
    worksheet.write(0, col, header)

# Configuration list to test
configurations = [
    {"layers": [7, 8, 8, 1], "learning_rate": 0.001, "batch_size": 32, "validation_split": 0.3, "epochs": 100},
    {"layers": [7, 4, 4, 1], "learning_rate": 0.0003, "batch_size": 64, "validation_split": 0.25, "epochs": 150},
    {"layers": [7, 6, 6, 1], "learning_rate": 0.001, "batch_size": 32, "validation_split": 0.15, "epochs": 120},
    {"layers": [7, 10, 5, 1], "learning_rate": 0.001, "batch_size": 128, "validation_split": 0.2, "epochs": 200},
    {"layers": [7, 5, 5, 1], "learning_rate": 0.0003, "batch_size": 32, "validation_split": 0.25, "epochs": 200},
    {"layers": [7, 8, 4, 1], "learning_rate": 0.001, "batch_size": 64, "validation_split": 0.1, "epochs": 150},
    {"layers": [7, 6, 6, 1], "learning_rate": 0.001, "batch_size": 64, "validation_split": 0.3, "epochs": 200},
    {"layers": [7, 8, 8, 1], "learning_rate": 0.0003, "batch_size": 64, "validation_split": 0.25, "epochs": 100},
    {"layers": [7, 10, 10, 1], "learning_rate": 0.0003, "batch_size": 128, "validation_split": 0.3, "epochs": 200},
    {"layers": [7, 4, 4, 1], "learning_rate": 0.001, "batch_size": 64, "validation_split": 0.15, "epochs": 100}
]

# Number of runs per configuration
num_runs = 5

In [None]:
# Run each configuration multiple times and log the results
row_counter = 1
for config_idx, config in enumerate(configurations, start=1):
    print(f"Running Configuration {config_idx}...")
    
    for run_idx in range(1, num_runs+1):
        print(f"  Run {run_idx}...")
        
        try:
            nn = Neural(config["layers"], config["epochs"], config["learning_rate"], config["batch_size"], config["validation_split"], verbose=0)
            nn.fit(X_train, y_train)
            y_pred = nn.predict(X_test)
            
            # Check for NaN values in predictions
            if np.isnan(y_pred).any():
                print("NaN values detected in predictions")
                print(f"Number of NaN values: {np.isnan(y_pred).sum()}")
                print(f"Prediction shape: {y_pred.shape}")
                raise ValueError("NaN values in predictions")
            
            test_error = mean_squared_error(y_test, y_pred)
            
            # Log the results in the Excel sheet
            worksheet.write(row_counter, 0, config_idx)
            worksheet.write(row_counter, 1, run_idx)
            worksheet.write(row_counter, 2, str(config["layers"]))
            worksheet.write(row_counter, 3, config["learning_rate"])
            worksheet.write(row_counter, 4, config["batch_size"])
            worksheet.write(row_counter, 5, config["validation_split"])
            worksheet.write(row_counter, 6, config["epochs"])
            worksheet.write(row_counter, 7, test_error)
            # Calculate and log the Percent
            Percent = round(test_error * 100, 2)
            worksheet.write(row_counter, 8, Percent)
            
            row_counter += 1
            
            # Save the learning curve for this configuration and run
            nn.plot_learning(config_idx, run_idx)
        
        except ValueError as e:
            print(f"Error in configuration {config_idx}, run {run_idx}: {str(e)}")
            # Log the error in the Excel sheet
            worksheet.write(row_counter, 0, config_idx)
            worksheet.write(row_counter, 1, run_idx)
            worksheet.write(row_counter, 2, str(config["layers"]))
            worksheet.write(row_counter, 3, config["learning_rate"])
            worksheet.write(row_counter, 4, config["batch_size"])
            worksheet.write(row_counter, 5, config["validation_split"])
            worksheet.write(row_counter, 6, config["epochs"])
            worksheet.write(row_counter, 7, "Error: " + str(e))
            row_counter += 1
        
        except Exception as e:
            print(f"Unexpected error in configuration {config_idx}, run {run_idx}: {str(e)}")
            # Log the error in the Excel sheet
            worksheet.write(row_counter, 0, config_idx)
            worksheet.write(row_counter, 1, run_idx)
            worksheet.write(row_counter, 2, str(config["layers"]))
            worksheet.write(row_counter, 3, config["learning_rate"])
            worksheet.write(row_counter, 4, config["batch_size"])
            worksheet.write(row_counter, 5, config["validation_split"])
            worksheet.write(row_counter, 6, config["epochs"])
            worksheet.write(row_counter, 7, "Unexpected Error: " + str(e))
            row_counter += 1

# Close the workbook
workbook.close()

Running Configuration 1...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 2...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 3...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 4...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 5...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 6...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 7...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 8...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 9...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
Running Configuration 10...
  Run 1...
  Run 2...
  Run 3...
  Run 4...
  Run 5...
