### Part d): Classification  analysis using neural networks

With a well-written code it should now be easy to change the
activation function for the output layer.

Here we will change the cost function for our neural network code
developed in parts b) and c) in order to perform a classification analysis. 

We will here study the Wisconsin Breast Cancer  data set. This is a typical binary classification problem with just one single output, either True or Fale, $0$ or $1$ etc.
You find more information about this at the [Scikit-Learn
site](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html) or at the [University of California
at Irvine](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)). 

To measure the performance of our classification problem we use the
so-called *accuracy* score.  The accuracy is as you would expect just
the number of correctly guessed targets $t_i$ divided by the total
number of targets, that is

$$
\text{Accuracy} = \frac{\sum_{i=1}^n I(t_i = y_i)}{n} ,
$$

where $I$ is the indicator function, $1$ if $t_i = y_i$ and $0$
otherwise if we have a binary classification problem. Here $t_i$
represents the target and $y_i$ the outputs of your FFNN code and $n$ is simply the number of targets $t_i$.

Discuss your results and give a critical analysis of the various parameters, including hyper-parameters like the learning rates and the regularization parameter $\lambda$ (as you did in Ridge Regression), various activation functions, number of hidden layers and nodes and activation functions.  

As stated in the introduction, it can also be useful to study other
datasets. 

Again, we strongly recommend that you compare your own neural Network
code for classification and pertinent results against a similar code using **Scikit-Learn**  or **tensorflow/keras** or **pytorch**.

PLAN:
1. last ned breast cancer data set
2. bruk ffnn til klassifikasjon (med ... som siste lag)
3. Bruk back propogation for å forbedre svaret
4. bruke accuracy til å teste resultatene mine 

In [None]:
## just downloading the dataset
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import autograd.numpy as np
from autograd import grad, elementwise_grad
import FFNN as fn

wisconsin = load_breast_cancer()
X = wisconsin.data
target = wisconsin.target
target = target.reshape(target.shape[0], 1)

X_train, X_val, t_train, t_val = train_test_split(X, target)

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

#print(X[0].size)

#""""
network_input_size = X[0].size
print(X.size)
print(network_input_size)
layer_output_sizes = [2, 3, 2]
activation_funcs = [fn.sigmoid, fn.sigmoid, fn.ReLU]
activation_ders = [fn.sigmoid_der, fn.sigmoid_der, fn.ReLU_der]

layers = [(np.random.randn(network_input_size,2), np.random.randn(network_input_size))]
batched_layers = [(layers[0][0].T, layers[0][1])]
##fn.create_layers(network_input_size, layer_output_sizes)

predict = fn.feed_forward_batch(X, layers, activation_funcs)

layer_grads = fn.backpropagation_batch(X, layers, activation_funcs, target, activation_ders)
print(layer_grads)

cost_grad = grad(fn.cost, 0)
print(cost_grad(layers, X, activation_funcs, target))

cost_grad = grad(fn.cost_batch, 0)
print(cost_grad(batched_layers, X, activation_funcs, target))
#"""

In [16]:
## https://gpt.uio.no/chat/810796
import autograd.numpy as np
from autograd import grad, elementwise_grad
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import FFNN as fn

# Defining some activation functions and their derivatives
def ReLU(z):
    return np.where(z > 0, z, 0)

def ReLU_der(z):
    return np.where(z > 0, 1, 0)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_der(z):
    return sigmoid(z) * (1 - sigmoid(z))

def mse(predict, target):
    return np.mean((predict - target) ** 2)

def mse_der(predict, target):
    return 2 / len(target) * (predict - target)

# Create the layers of the neural network
def create_layers_batch(network_input_size, layer_output_sizes):
    layers = []
    i_size = network_input_size
    for layer_output_size in layer_output_sizes:
        W = np.random.randn(i_size, layer_output_size) 
        b = np.random.randn(layer_output_size)
        layers.append((W, b))
        i_size = layer_output_size
    return layers

# Feed forward function for a batch of inputs
def feed_forward_batch(inputs, layers, activation_funcs):
    a = inputs
    for (W, b), activation_func in zip(layers, activation_funcs):
        z = a @ W + b 
        a = activation_func(z)
    return a

# Cost function for a batch of inputs
def cost_batch(layers, inputs, activation_funcs, target):
    predict = feed_forward_batch(inputs, layers, activation_funcs)
    return mse(predict, target)

# Function to save the input at each layer during feed forward for backpropagation
def feed_forward_saver_batch(inputs, layers, activation_funcs):
    layer_inputs = []
    zs = []
    a = inputs
    for (W, b), activation_func in zip(layers, activation_funcs):
        layer_inputs.append(a)
        z = a @ W + b
        a = activation_func(z)
        zs.append(z)
    return layer_inputs, zs, a

# Backpropagation function
def backpropagation_batch(inputs, layers, activation_funcs, target, activation_ders, cost_der=mse_der):
    layer_inputs, zs, predict = feed_forward_saver_batch(inputs, layers, activation_funcs)
    layer_grads = [() for layer in layers]
    
    for i in reversed(range(len(layers))):
        layer_input, z, activation_der = layer_inputs[i], zs[i], activation_ders[i]

        if i == len(layers) - 1:
            dC_da = cost_der(predict, target)
        else:
            (W, b) = layers[i + 1]
            dC_da = dC_dz @ W.T

        dC_dz = dC_da * activation_der(z)
        dC_dW = layer_input.T @ dC_dz
        dC_db = np.sum(dC_dz, axis=0)

        layer_grads[i] = (dC_dW, dC_db)

    return layer_grads


# Load and preprocess the Wisconsin Breast Cancer dataset
data = datasets.load_breast_cancer()
X = data.data
y = data.target
y = y.reshape(-1, 1)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Network configuration
input_size = X_train.shape[1]
output_size = 1
hidden_layers = [10, 10]  # Two hidden layers with 10 neurons each
layers = fn.create_layers_batch(input_size, hidden_layers + [output_size])
activation_funcs = [fn.ReLU, fn.ReLU, fn.sigmoid]
activation_ders = [fn.ReLU_der, fn.ReLU_der, fn.sigmoid_der]

# Hyperparameters
lr = 0.01
epochs = 1000

# Training loop
for epoch in range(epochs):
    grads =fn.backpropagation_batch(X_train, layers, activation_funcs, y_train, activation_ders)
    for i, (W, b) in enumerate(layers):
        dW, db = grads[i]
        W -= lr * dW
        b -= lr * db
        layers[i] = (W, b)
    
    if epoch % 100 == 0:
        cost_value = fn.cost_batch(layers, X_train, activation_funcs, y_train)
        print(f"Epoch {epoch}, cost: {cost_value}")

# Evaluate the model
preds = fn.feed_forward_batch(X_test, layers, activation_funcs)
preds = np.round(preds)
accuracy = accuracy_score(y_test, preds)
print(f"Accuracy on test set: {accuracy}")


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 512)