In [22]:
# Import Statements 
import pandas as pd
import numpy
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [23]:
# Reading in the CSV file from Kaggle (Credits to Paola Mazza) into a Pandas Data Frame
players_df = pd.read_csv("players.csv")

In [5]:
# Split the dataset based on values in the 'Position' column
defenders_df = players_df[players_df['position'] == 'DEF']
midfielders_df = players_df[players_df['position'] == 'MID']
forwards_df = players_df[players_df['position'] == 'FWD']
keepers_df = players_df[players_df['position'] == 'GKP']

In [6]:
# Preprocess the data within the Pandas Data Frame
def preprocess(position_df):
    processed_df = position_df.copy()
    processed_df = processed_df.drop_duplicates()
    
    return processed_df

# Defenders Data
processed_defenders_df = preprocess(defenders_df)

# Midfielders Data
processed_midfielders_df = preprocess(midfielders_df)

# Forwards Data
processed_forwards_df = preprocess(forwards_df)

# Keepers Data
processed_keepers_df = preprocess(keepers_df)

In [25]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [36]:
def create_and_train_nn(df, player_type):
    # Extract features and target variable
    X = df.drop(['total_points'], axis=1).values
    y = df['total_points'].values

    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Convert data to PyTorch tensors
    X_train = torch.FloatTensor(X_train)
    X_test = torch.FloatTensor(X_test)
    y_train = torch.FloatTensor(y_train)
    y_test = torch.FloatTensor(y_test)

    # Create the model
    input_size = X_train.shape[1]
    hidden_size1 = 64
    hidden_size2 = 32
    model = NeuralNetwork(input_size, hidden_size1, hidden_size2)

    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    epochs = 1000
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train.view(-1, 1))
        loss.backward()
        optimizer.step()

    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
        mse = mean_squared_error(y_test.numpy(), y_pred.numpy().flatten())
        print(f'Mean Squared Error for {player_type}: {mse}')

    return model

In [39]:
create_and_train_nn(processed_defenders_df[processed_defenders_df.select_dtypes(include=['int']).columns], "Defenders")
create_and_train_nn(processed_midfielders_df[processed_midfielders_df.select_dtypes(include=['int']).columns], "Midfielders")
create_and_train_nn(processed_forwards_df[processed_forwards_df.select_dtypes(include=['int']).columns], "Forwards")
create_and_train_nn(processed_keepers_df[processed_keepers_df.select_dtypes(include=['int']).columns], "Keepers")

Mean Squared Error for Defenders: 7.910803318023682
Mean Squared Error for Midfielders: 8.129537582397461
Mean Squared Error for Forwards: 99.73937225341797
Mean Squared Error for Keepers: 8.17061996459961


NeuralNetwork(
  (fc1): Linear(in_features=42, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
)