# I. Algorithm

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import StandardScaler

In [31]:
import numpy as np

class Neural_Networks:
    def __init__(self, layer_sizes, learning_rate=0.01):
        """
        layer_sizes: list of sizes like [input_size, h1, h2, ..., output_size]
        """
        self.lr = learning_rate
        self.num_layers = len(layer_sizes) - 1
        self.weights = []
        self.biases = []

        # Initialize weights and biases
        for i in range(self.num_layers):
            # in_size gives number of inputs at layer_i, out_size gives number of outputs after layer_i
            in_size = layer_sizes[i]
            out_size = layer_sizes[i+1]
            # Initialize weights according to the Kaiming He initialization rule
            self.weights.append(np.random.randn(in_size, out_size) * np.sqrt(2. / in_size))
            self.biases.append(np.zeros((1, out_size)))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_deriv(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)
       
    def forward(self, X):
        self.zs = []  # Linear outputs
        self.activations = [X]  # Store inputs and activations
        a = X
        for i in range(self.num_layers - 1):  # Hidden layers
            z = a @ self.weights[i] + self.biases[i]
            a = self.sigmoid(z)
            self.zs.append(z)
            self.activations.append(a)
        # Output layer 
        z = a @ self.weights[-1] + self.biases[-1]
        self.zs.append(z)
        self.activations.append(z)
        return z

    def backward(self, y_true, y_pred):
        m = y_true.shape[0]
        grads_w = [None] * self.num_layers
        grads_b = [None] * self.num_layers

        # Output layer gradients (MSE loss)
        delta = (y_pred - y_true) / m
        grads_w[-1] = self.activations[-2].T @ delta
        grads_b[-1] = np.sum(delta, axis=0, keepdims=True)

        # Hidden layers (backpropagation)
        for i in reversed(range(self.num_layers - 1)):
            delta = (delta @ self.weights[i+1].T) * self.sigmoid_deriv(self.zs[i])
            grads_w[i] = self.activations[i].T @ delta
            grads_b[i] = np.sum(delta, axis=0, keepdims=True)

        # Update weights and biases
        for i in range(self.num_layers):
            self.weights[i] -= self.lr * grads_w[i]
            self.biases[i] -= self.lr * grads_b[i]

    def train(self, X, y, epochs=1000, verbose=True):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = np.mean((y - y_pred) ** 2)
            self.backward(y, y_pred)
            if verbose and epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        return self.forward(X)


# II. Application to Data Set

In [20]:
# Loading in Data Set and Cleaning
game_data = pd.read_csv("../bgg_dataset.csv",delimiter=";")

game_data['Complexity Average'] = game_data['Complexity Average'].str.replace(',', '.').astype(float)
game_data['Rating Average'] = game_data['Rating Average'].str.replace(',', '.').astype(float)

# Selecting Columns of Top 100 Games
game_cleaned = game_data[['Rating Average','Complexity Average','Min Age','Play Time','BGG Rank']]
game_cleaned = game_cleaned[game_cleaned['BGG Rank'] <= 100]
game_cleaned.dropna(inplace = True)

# Labelling short games as those where play time is less than or equal to 60 minutes
game_cleaned['short'] = np.where(game_cleaned['Play Time'] <= 60, 1, 0)

short = game_cleaned[game_cleaned['short'] == 1]
long = game_cleaned[game_cleaned['short'] == -1]

We will use ```'Rating Average'```, ```'Complexity Average'```, and ```'Min Age'``` to predict whether the game is short or long.

In [32]:
# Preparing data 
X = game_cleaned[['Rating Average', 'Complexity Average','Min Age']].to_numpy()
y = game_cleaned['short'].to_numpy()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Scale the features (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Define and fit the neural network
model = Neural_Networks(layer_sizes = [3, 2, 1])
model.train(X_train_scaled, y_train)

# 4. Evaluate the model
y_pred = model.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))



ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 80)