This is the main file to toy around with model training and selection!

# Imports

In [None]:
import glob
import chess_SL_E8_lib as lib
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import chess
import numpy as np

# Set these parameters according to what you would want the model to be named
MODEL_NUM = 'TEST'
MODEL_VERSION = 'TEST1'

#### Note to Grader: Please install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) to train and predict on GPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1. Training (Optional. Skip to Section 2 - Prediction).

Before training, the dataset must be parsed and preprocessed via bash scripts. Please see the GitHub Repo's [README](https://github.com/kyleosung/DS3ML3_Final_Project/blob/main/README.md) for more instructions.

### Create a Subset of the Data

In [None]:
path = "../Data/DataTrain" # Change this to the path you installed the data

# Choose the files you want to train on and validate on
# Currently, we are training on all files beginning with y or z and
# and validating on all files beginning with x
letters_train = 'abcdefghijklmnop'
letters_valid = 'qrstuvwxyz'

csv_files_train = []
csv_files_valid = []

for let in letters_train:
    csv_files_train.extend(glob.glob(f'{path}/Chess_Jan_{let}*'))

for let_ in letters_valid:
    csv_files_valid.extend(glob.glob(f'{path}/Chess_Jan_{let_}*'))

#### Training Loop (See [chess_SL_E8.py](https://github.com/kyleosung/DS3ML3_Final_Project/blob/main/chess_SL_E8_lib.py) for implementation!)

In [None]:
# Create a dataset
dataset1 = lib.ChessIterableDataset(csv_files_train)
dataset2 = lib.ChessIterableDataset(csv_files_valid)

# Create a data loader
train_data_loader = DataLoader(dataset1, batch_size = 15000)
val_data_loader = DataLoader(dataset2, batch_size = 15000)

# Create a model
model = lib.EvalNet()
model = model.to(device)

criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.0065)

# Train the model
train_loss_hist, valid_loss_hist = lib.train(model, train_data_loader, val_data_loader, criterion, optimizer, num_epochs = 30)

#### Plot Training and Validation Loss

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 4))
plt.plot(train_loss_hist, label = 'Training Loss')
plt.plot(valid_loss_hist, label = 'Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
# plt.ylim(0, None)
plt.title(f'Loss for E{MODEL_NUM}-{MODEL_VERSION} Model')
plt.legend()
plt.savefig(f'Loss_E{MODEL_NUM}-{MODEL_VERSION}.png')
plt.show()


#### Save model and histories

In [None]:
torch.save(model, f'model_E{MODEL_NUM}-{MODEL_VERSION}.pth')

In [None]:
import pickle

pickle.dump(train_loss_hist, open(f'pickle/training_loss_history_E{MODEL_NUM}-{MODEL_VERSION}.pkl', 'wb'))
pickle.dump(valid_loss_hist, open(f'pickle/validation_loss_history_E{MODEL_NUM}-{MODEL_VERSION}.pkl', 'wb'))

# 2. Prediction

In [None]:
import chess_SL_E8_lib_ensemble as libEns
import chess_SK_lib as SKlib
import chess_SL_E8_lib as lib

import chess
import torch

import joblib
from sklearn.neighbors import KNeighborsRegressor

Choose Torch model to load (only 1, max)

**Important Note**: The Torch model loaded will change the parameters of the function and might necessitate slight modification to the code. Please change the imported library to the appropriate library (i.e. if you choose to load in an E7 model, import the E7 library). Please also note that only the E8 model currently is compatible with the ensemble learning with SKLearn Models.

In [None]:
## LOAD TORCH MODEL

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

board = chess.Board()

modelDL = torch.load('../models_EL/model_E7-2.pth', map_location=device)

Choose SKLearn Models to Load

**Important Note:** There is not enough RAM or storage on most computers to store the KNN model (as it would need to store the fitted data for the locations of each of the points). Thus, please instantiate and fit a new KNN model as desired.

In [None]:
## LOAD SKLEARN MODELS

models_to_load = ['RF_1', 'SVR_1', 'LR_1'] # do not put KNN her
models_list = []

for i, model_name in enumerate(): #find way to add KNN
    models_list.append(joblib.load(f'models_SK/model_{model_name}.joblib'))

X_train, X_test, y_train, y_test = SKlib.load_data_XY_a_to_d()

## Use this to add KNN
knn2 = KNeighborsRegressor(n_neighbors=2)
knn2.fit(X_train, y_train)
models_list.append(knn2)

In [None]:
libEns.__test_ensemble_model(modelDL, models_list)