In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import setup
import parse
import utils
import train

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.activations import relu
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import TerminateOnNaN, EarlyStopping
from sklearn.model_selection import train_test_split, KFold

#### Load original dataset and plot score

In [None]:
df = pd.read_csv(setup.DATASET_DIR+setup.DATASET, nrows=setup.N_ROWS, dtype={"Fen": np.string_, "Evaluation": np.string_})
df["Evaluation"] = df["Evaluation"].apply(lambda x: parse.stockfish_eval_to_int(x))
df["Evaluation"].plot.hist(bins=100)

#### Load vectorized dataset and plot normalized score

In [None]:
df_vectorized = pd.read_csv(setup.DATASET_DIR+setup.DATASET_VECTORIZED, nrows=setup.N_ROWS)
df_vectorized["label"].plot.hist(bins=100)

In [None]:
df_vectorized

In [None]:
# Check for null values
assert df_vectorized[df_vectorized.isnull().values].empty
# Check shape (+1 is for the label)
assert df_vectorized.shape == (setup.N_ROWS, setup.N_FEATURES + 1)

### Train the model

In [None]:
model, (history, train_error, cv_error) = train.create_model_and_train(df_vectorized)

### Evaluate error and plot learning curve

In [None]:
utils.plot_learning_curve(history)
y_cv = df_vectorized["label"]
print(f"Training set error: {train_error:.2}")
print(f"Cross-validation set error: {cv_error:.2}")
print(f"Random classifier error: {((np.random.rand(len(y_cv)) - y_cv)**2).mean():.2}")

In [None]:
if setup.SAVE_MODEL:
    model.save(setup.MODEL_NAME)

In [None]:
if setup.K_FOLD:
    features = [f"f_{str(x)}" for x in range(1, setup.N_FEATURES+1)]
    k_fold = KFold(n_splits=setup.N_FOLDS)
    train_error_all = []
    cv_error_all = []
    X = df_vectorized[features]
    y = df_vectorized["label"]

    # TODO reset index first?
    for i, (train, test) in enumerate(k_fold.split(X, y)):
        model = create_model()
        _, train_error, cv_error = train_evaluate_model(model, X.loc[train], y.loc[train], X.loc[test], y.loc[test])
        train_error_all.append(train_error)
        cv_error_all.append(cv_error)
        print(f"Model #{i+1} done! CV error: {cv_error:.2}")
        del model

    print(f"Training sets error:{train_error_all}")
    print(f"Cross-validation sets sets errors:{cv_error_all}")
    print(f"Training sets mean error: {np.mean(np.array(train_error_all)):.2}")
    print(f"Cross-validation sets mean error: {np.mean(np.array(cv_error_all)):.2}")