In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Read data from file
with open('data.txt', 'r') as f:
    data = f.readlines()

# Define player data
players = []
for i in range(0, len(data), 15):
    player = {
        'fantasy_scores': np.array([float(j) for j in data[i].split(',')], dtype=np.float32),
        'season_encodings': np.array([int(j) for j in data[i+1].split(',')], dtype=np.float32),
        'weeks_encodings': np.array([int(j) for j in data[i+2].split(',')], dtype=np.float32),
        'defense_ranks': np.array([int(j) for j in data[i+3].split(',')], dtype=np.float32),
        'defense_id': np.array([int(j) for j in data[i+4].split(',')], dtype=np.float32),       
        'skill_score': np.array([float(j) for j in data[i+5].split(',')], dtype=np.float32),
        'seasons_played': np.array([int(j) for j in data[i+6].split(',')], dtype=np.float32),
        'player_id': np.array([int(j) for j in data[i+7].split(',')], dtype=np.float32), 
        'three_game_average': np.array([float(j) for j in data[i+8].split(',')], dtype=np.float32),
        'four_game_average': np.array([float(j) for j in data[i+9].split(',')], dtype=np.float32),
        'five_game_average': np.array([float(j) for j in data[i+10].split(',')], dtype=np.float32),
        'six_game_average': np.array([float(j) for j in data[i+11].split(',')], dtype=np.float32),
        'seven_game_average': np.array([float(j) for j in data[i+12].split(',')], dtype=np.float32),
        'eight_game_average': np.array([float(j) for j in data[i+13].split(',')], dtype=np.float32),
        'nine_game_average': np.array([float(j) for j in data[i+14].split(',')], dtype=np.float32),
    }
    players.append(player)

# Process inputs and targets for each player
X_train, Y_train, X_test, Y_test = [], [], [], []
 
for player in players:   
    player_inputs = torch.from_numpy(player['season_encodings'][:-3]).view(1, 47, 1).float()
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['weeks_encodings'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['defense_ranks'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['defense_id'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['skill_score'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['seasons_played'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['player_id'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['three_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['four_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['five_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['six_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['seven_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['eight_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    player_inputs = torch.cat((player_inputs, torch.from_numpy(player['nine_game_average'][:-3]).view(1, 47, 1).float()), dim=2)
    X_train.append(player_inputs)
    Y_train.append(torch.from_numpy(player['fantasy_scores'][:-3]).view(1, 47, 1).float())
 
for player in players:
    player_test_inputs = torch.from_numpy(player['season_encodings'][-3:-2]).view(1, 1, 1).float()
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['weeks_encodings'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['defense_ranks'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['defense_id'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['skill_score'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['seasons_played'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['player_id'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['three_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['four_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['five_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['six_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['seven_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['eight_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    player_test_inputs = torch.cat((player_test_inputs, torch.from_numpy(player['nine_game_average'][-3:-2]).view(1, 1, 1).float()), dim=2)
    X_test.append(player_test_inputs)
    Y_test.append(torch.from_numpy(player['fantasy_scores'][-3:-2]).view(1, 1, 1).float())

# Combine inputs and targets for all players
X_train = torch.cat(X_train, dim=0).to(device)
Y_train = torch.cat(Y_train, dim=0).to(device)
X_test = torch.cat(X_test, dim=0).to(device)
Y_test = torch.cat(Y_test, dim=0).to(device)
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

(torch.Size([120, 47, 14]),
 torch.Size([120, 47, 1]),
 torch.Size([120, 1, 14]),
 torch.Size([120, 1, 1]))

In [30]:
# Define the neural network architecture
class RegressionModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        self.fc5 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        out = self.relu(out)
        out = self.fc5(out)
        return out

# Set hyperparameters
initial_learning_rate = 0.002
batch_size = 60 
num_batches = len(X_train) // batch_size  # Only full batches

# Create the model
model = RegressionModel(14, 1000).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=initial_learning_rate)

train_dataset = data_utils.TensorDataset(X_train, Y_train)
train_loader = data_utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Set up a learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.5)

# prints the number of parameters in the model
total = 0
for i in list(model.parameters()):
    total += len(i)
total

8002

In [6]:
# Training loop for each player
epochs = 1200
 
# Training loop
for epoch in range(epochs):
    model.train()  # Set the model in training mode
    total_loss = 0.0

    for batch_X, batch_Y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_Y)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Adjust learning rate using the scheduler
    scheduler.step()

    # Print the average loss for the epoch
    if epoch + 1 == epochs:
        print(f"Epoch {epoch+1}/{epochs}, Average Loss: {total_loss/num_batches}")

# Evaluation on the test set
model.eval()  # Set the model in evaluation mode
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, Y_test)
    print(f"Test Loss: {test_loss.item()}")

print() # blank line
# Print the predictions and original scores for the player
for i in range(len(X_test[0])):
    for j in range(1, 6):
        print(f"Player {j}: Skill Score: {round(X_test[-j][i][4].item(), 2)}, Defense Rank: {X_test[-j][i][2].item()} " + 
              f"Predicted Fantasy Score: {round(test_outputs[-j][i].item(), 2)}, Actual Fantasy Score: {round(Y_test[-j][i].item(), 2)}")
    print() # blank line

Epoch 1/1, Average Loss: 124.30219078063965
Test Loss: 43.12538528442383

Player 1: Skill Score: 13.9, Defense Rank: 26.0 Predicted Fantasy Score: 2.86, Actual Fantasy Score: 10.2
Player 2: Skill Score: 11.96, Defense Rank: 14.0 Predicted Fantasy Score: 2.87, Actual Fantasy Score: 19.3
Player 3: Skill Score: 10.01, Defense Rank: 7.0 Predicted Fantasy Score: 2.88, Actual Fantasy Score: 11.2
Player 4: Skill Score: 8.96, Defense Rank: 6.0 Predicted Fantasy Score: 2.86, Actual Fantasy Score: 11.7
Player 5: Skill Score: 13.73, Defense Rank: 0.0 Predicted Fantasy Score: 2.83, Actual Fantasy Score: 0.0



In [None]:
# Flatten the NumPy arrays
test_outputs_cpu = test_outputs.cpu().flatten()
Y_test_cpu = Y_test.cpu().flatten()

# Create a scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(Y_test_cpu, test_outputs_cpu, alpha=0.5)  # alpha parameter controls transparency

# Add labels and title
plt.xlabel('Actual Values (Y_test)')
plt.ylabel('Predicted Values (test_outputs)')
plt.title('Scatter Plot of Predicted vs. Actual Values')

# Add a diagonal reference line (perfect predictions)
plt.plot([min(Y_test_cpu), max(Y_test_cpu)], [min(Y_test_cpu), max(Y_test_cpu)], 
         color='red', linestyle='--', linewidth=2)

# Show the plot
plt.grid(True)
plt.show()