In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as opt 
import torch.nn.functional as F
import tqdm
from torch.utils.data import DataLoader, Dataset

In [2]:
df = pd.read_parquet('RandomBot_GreedyBot_10k_clean.parquet')

In [3]:
class MyRegression(nn.Module):
    def __init__(self, k, hidden):
        super().__init__()
        self.linear1 = nn.Linear(k, hidden * 2)
        self.linear2 = nn.Linear(hidden * 2, hidden)
        self.linear_last = nn.Linear(hidden, 6)

    def forward(self, x_in):
        hidden = F.relu(self.linear1(x_in))
        hidden = F.relu(self.linear2(hidden))
        return self.linear_last(hidden)

In [4]:
regressor = MyRegression(k=14, hidden=50)

In [5]:
class DataSet(Dataset):
    def __init__(self, df):
        self.df = df
    
    def __getitem__(self, idx):
        item = df.iloc[idx]
        output = {'state': torch.FloatTensor(item['BoardState']), 'reward' : item['ewma_reward'], 'action': item['action']}
        return output
    
    def __len__(self):
        return df.shape[0]

In [6]:
data = DataSet(df=df)

In [7]:
dataloader = DataLoader(data, batch_size=1000, shuffle=True, num_workers=5)

In [8]:
def transform_y(actions, y, y_hat):
    vector_output = y_hat.clone()
    for index, action, true_value in zip(range(len(y)), actions, y):
        vector_output[index][action-1] = true_value
    return vector_output

In [9]:
loss_func = torch.nn.L1Loss()
optimizer = opt.SGD(regressor.parameters(), lr=1)
epochs = tqdm.trange(10)

for i in epochs:
    losses = []
    for batch_idx, batch in enumerate(dataloader):
        x = batch['state']
        y = batch['reward']
        actions = batch['action']

        y_hat = regressor(x)
        y_transformed = transform_y(actions, y, y_hat)
        loss = loss_func(y_hat, y_transformed)
        losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    epochs.set_description(f'loss: {np.mean(losses)}')

loss: 2.7439489424874206: 100%|██████████| 10/10 [02:48<00:00, 16.83s/it]


In [13]:
torch.save(regressor, 'model_Greedy_vs_Random.pth')

In [15]:
regr = torch.load('model_Greedy_vs_Random.pth')

In [23]:
regr(torch.FloatTensor(df['BoardState'][1]))

tensor([ 6.2078e+00,  4.8259e+00, -5.9199e-03,  5.6166e+00,  3.9525e+00,
        -2.5271e+00], grad_fn=<AddBackward0>)

In [16]:
np.argmax(regr(x).detach().numpy(), axis=1) + 1

array([6, 3, 1, 2, 6, 2, 6, 2, 6, 2, 2, 2, 2, 3, 3, 4, 3, 2, 5, 2, 6, 6,
       2, 2, 6, 1, 1, 6, 2, 2, 6, 5, 2, 2, 2, 2, 2, 3, 2, 5, 6, 2, 2, 1,
       2])