In [3]:
import numpy as np
from neural_network_class import NeuralNetwork
from sklearn.metrics import accuracy_score

# Assuming the data preparation and scaling functions are included in 'dataset' module
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

def prepare_data(filepath):
    # Load the dataset
    data = pd.read_csv(filepath)
    
    # Convert 'Wage' from string to numeric by removing commas and converting to integer
    data['Wage'] = data['Wage'].str.replace(',', '').astype(int)
    
    # Select the first 600 entries
    data = data.iloc[:600]
    
    # Identify categorical columns
    categorical_columns = ['Club', 'League', 'Nation', 'Position']
    
    # One-hot encode the categorical data
    data = pd.get_dummies(data, columns=categorical_columns)
    
    # Split the data into features and target
    X = data.drop('Wage', axis=1)
    Y = data['Wage']
    
    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
    
    return X_train, X_test, y_train, y_test


# Load and prepare data
file_path = 'SalaryPrediction.csv'
X_train, X_test, y_train, y_test = prepare_data(file_path)

unique_classes = np.unique(y_train)
class_to_index = {label: index for index, label in enumerate(unique_classes)}
y_train_mapped = np.vectorize(class_to_index.get)(y_train)

# Repeat for y_test if necessary

y_train_onehot = np.eye(len(np.unique(y_train)))[y_train]
y_test_onehot = np.eye(len(np.unique(y_test)))[y_test]


input_size = X_train.shape[1]
hidden_size = 8  # Example size of hidden layer
output_size = y_train_onehot.shape[1]  # Single output node since we are predicting a single value
learning_rate = 0.01


nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)
nn.train(X_train, y_train, epochs=1000)

# Predict on test set
y_pred = nn.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

#accuracy evaluation
accuracy = accuracy_score(y_test, y_pred_labels)
print(f"Test accuracy: {accuracy * 100:.2f}%")


IndexError: index 11440000 is out of bounds for axis 0 with size 177

In [4]:
print("Max value in y_train:", y_train.max())
print("Unique values in y_train:", len(np.unique(y_train)))


Max value in y_train: 46427000
Unique values in y_train: 177
