In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler

In [4]:
data = pd.read_csv('/content/drive/MyDrive/ML Project/TeamStats.csv')

In [18]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [19]:
scaler = StandardScaler()
X_std = scaler.fit_transform(X)

In [20]:
train_split = 0.8

X_train, X_test, y_train, y_test = train_test_split(X_std, y, train_size=train_split, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [21]:
class LinReg(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinReg, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.fc(x)

In [22]:
def train_and_evaluate(train_loader, test_loader, input_size, output_size, learning_rate, epochs):
    model = LinReg(input_size, output_size)
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    # Train
    for epoch in range(epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            y_batch = y_batch.view_as(y_pred)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()

        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

    # Evaluate
    model.eval()
    predictions = []
    true_values = []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            predictions.extend(y_pred.numpy())
            true_values.extend(y_batch.numpy())

    predictions = np.array(predictions).reshape(-1)
    true_values = np.array(true_values).reshape(-1)
    mse = np.mean((predictions - true_values) ** 2)
    return mse

In [24]:
param_grid = {
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
    'batch_size': [1, 2, 5, 10, 30],
    'epochs': [50, 100, 150, 200, 250]
}

best_mse = float('inf')
best_params = None

input_size = X_train_tensor.shape[1]
output_size = 1

for params in ParameterGrid(param_grid):
    train_loader = DataLoader(dataset=train_dataset, batch_size=params['batch_size'], shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=params['batch_size'], shuffle=False)

    print(f'Testing parameters: {params}')
    mse = train_and_evaluate(train_loader, test_loader, input_size, output_size, params['learning_rate'], params['epochs'])
    print(f'Means Squared Error: {mse}')

    if mse < best_mse:
        best_mse = mse
        best_params = params

print(f'Best parameters: {best_params}')
print(f'Best Mean Squared Error: {best_mse}')

Testing parameters: {'batch_size': 1, 'epochs': 50, 'learning_rate': 0.001}
Epoch [10/50], Loss: 1112.6698
Epoch [20/50], Loss: 36.9964
Epoch [30/50], Loss: 16.4918
Epoch [40/50], Loss: 12.6258
Epoch [50/50], Loss: 0.3503
Means Squared Error: 10.642772674560547
Testing parameters: {'batch_size': 1, 'epochs': 50, 'learning_rate': 0.005}
Epoch [10/50], Loss: 3.6461
Epoch [20/50], Loss: 0.3551
Epoch [30/50], Loss: 0.8911
Epoch [40/50], Loss: 0.0173
Epoch [50/50], Loss: 0.0499
Means Squared Error: 0.41347527503967285
Testing parameters: {'batch_size': 1, 'epochs': 50, 'learning_rate': 0.01}
Epoch [10/50], Loss: 0.2719
Epoch [20/50], Loss: 0.0215
Epoch [30/50], Loss: 0.0102
Epoch [40/50], Loss: 0.0045
Epoch [50/50], Loss: 0.3122
Means Squared Error: 0.4451717436313629
Testing parameters: {'batch_size': 1, 'epochs': 50, 'learning_rate': 0.05}
Epoch [10/50], Loss: 143495.0781
Epoch [20/50], Loss: 2538065887232.0000
Epoch [30/50], Loss: 5210138652553773056.0000
Epoch [40/50], Loss: 47369455355

  mse = np.mean((predictions - true_values) ** 2)


Epoch [10/150], Loss: 819.8314
Epoch [20/150], Loss: 42.7477
Epoch [30/150], Loss: 0.0553
Epoch [40/150], Loss: 0.3756
Epoch [50/150], Loss: 6.3128
Epoch [60/150], Loss: 2.3381
Epoch [70/150], Loss: 0.0554
Epoch [80/150], Loss: 3.7798
Epoch [90/150], Loss: 0.7357
Epoch [100/150], Loss: 0.8879
Epoch [110/150], Loss: 0.0047
Epoch [120/150], Loss: 0.0034
Epoch [130/150], Loss: 0.9031
Epoch [140/150], Loss: 0.5373
Epoch [150/150], Loss: 0.0054
Means Squared Error: 0.6035837531089783
Testing parameters: {'batch_size': 1, 'epochs': 150, 'learning_rate': 0.005}
Epoch [10/150], Loss: 0.8788
Epoch [20/150], Loss: 0.0032
Epoch [30/150], Loss: 0.0039
Epoch [40/150], Loss: 0.0323
Epoch [50/150], Loss: 0.0712
Epoch [60/150], Loss: 0.0280
Epoch [70/150], Loss: 0.3537
Epoch [80/150], Loss: 0.2358
Epoch [90/150], Loss: 0.0104
Epoch [100/150], Loss: 0.1042
Epoch [110/150], Loss: 0.2188
Epoch [120/150], Loss: 0.0014
Epoch [130/150], Loss: 0.0265
Epoch [140/150], Loss: 0.0340
Epoch [150/150], Loss: 0.024

In [None]:
# bs = 2
# epochs = 250
# lr = 0.05
# mse = 0.015224060975015163