In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from collections import Counter
#
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
import torch
import torch.nn as nn
#
from problems import get_problems, filter_problems, filter_problems_ge
from metrics import *
from utils import coords_map, grade_maps, get_board_setup, draw_moves, plot_mat, count_parameters, x_coords, y_coords, draw_coords

In [None]:
P_ROOT = Path("../data")
P_PROB = P_ROOT / "problems"
P_BOARD = P_ROOT / "boards/moonboard.png"
P_HOLDS = P_ROOT / "boards/holds"
setup_year = 2017
setup_angle = 45
#
#repsge = 10
minlen = 5
maxlen = 12
grade_names = ["6A+", "6B", "6B+", "6C", "6C+", "7A", "7A+", "7B", "7B+", "7C", "7C+", "8A"]
grade_rep_ge = {
    "6A+": 5,
    "6B":  5,
    "6B+": 5,
    "6C":  5,
    "6C+": 3,
    "7A":3,
    "7A+":2,
    "7B": 2,
    "7B+": 1,
    "7C": 1,
    "7C+": 1,
    "8A": 1,
    "8A+": 1,
}

In [None]:
probs = get_problems(P_PROB, setup_year, setup_angle)
print(len(probs))

probs = filter_problems_ge(probs, grade_rep_ge, grade_names, minlen, maxlen)
print(len(probs))


grade_to_num, num_to_grade = grade_maps(grade_names)

In [None]:
def extract_grades(probs, fromUser=False):
    grades = []
    for p in probs:
        if p["userGrade"] is not None:
            grades.append(p["userGrade"])
        else:
            grades.append(p["grade"])
    return grades

In [None]:
grades, nprobs = zip(*Counter(sorted([grade_to_num[p["grade"]] for p in probs])).items())
nprobs = np.array(nprobs)
grade_freqs =  nprobs / nprobs.sum()
class_weights = nprobs.sum() / nprobs

fig, axes = plt.subplots(1, 1, figsize=(16, 8))
plt.bar(grades, nprobs)
plt.ylabel("#probs")
plt.xlabel("#grades")
plt.xticks(grades, [num_to_grade[n] for n in grades])
plt.show()

# Create Data Matrices

In [None]:
X = np.zeros((len(probs), len(coords_map)))
Y = []
for prob_idx, prob in enumerate(probs):
    moves = prob["moves"]
    for move in moves:
        hold_idx = coords_map[move["description"].upper()]
        X[prob_idx, hold_idx] = 1
    Y.append(grade_to_num[prob["grade"]])
Y = np.array(Y)

In [None]:
r_train = 0.80
n_train = int(r_train * len(probs))
#
idcs = np.arange(len(probs))
for _ in range(10):
    np.random.shuffle(idcs)
idcs_train = idcs[:n_train]
idcs_valid = idcs[n_train:]

X_train = X[idcs_train]
Y_train = Y[idcs_train]

X_valid = X[idcs_valid]
Y_valid = Y[idcs_valid]

In [None]:
X_train.shape, Y_train.shape, X_valid.shape, Y_valid.shape

# LR

In [None]:
clf = LogisticRegression(random_state=0, max_iter=1000, solver="newton-cg", class_weight="balanced")
clf.fit(X_train, Y_train)
#
acc_train = clf.score(X_train, Y_train)
acc_valid = clf.score(X_valid, Y_valid)
#
print(acc_train, acc_valid)
#
Y_valid_pred = clf.predict(X_valid)
Y_train_pred = clf.predict(X_train)

In [None]:
y_true = Y_valid
y_pred = Y_valid_pred
for tol in range(2):
    rep = soft_classification_report(y_true, y_pred, tol, target_names=grade_names, digits=3)
    print(rep)

# KNN

In [None]:
clf = KNeighborsClassifier(n_neighbors=5, weights="distance")
clf.fit(X_train, Y_train)
#
acc_train = clf.score(X_train, Y_train)
acc_valid = clf.score(X_valid, Y_valid)
#
print(acc_train, acc_valid)
#
Y_valid_pred = clf.predict(X_valid)
Y_train_pred = clf.predict(X_train)

In [None]:
y_true = Y_valid
y_pred = Y_valid_pred
for tol in range(2):
    rep = soft_classification_report(y_true, y_pred, tol, target_names=grade_names, digits=3)
    print(rep)

# MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
clf = MLPClassifier()
clf.fit(X_train, Y_train)
#
acc_train = clf.score(X_train, Y_train)
acc_valid = clf.score(X_valid, Y_valid)
#
print(acc_train, acc_valid)
#
Y_valid_pred = clf.predict(X_valid)
Y_train_pred = clf.predict(X_train)

In [None]:
y_true = Y_valid
y_pred = Y_valid_pred
for tol in range(2):
    rep = soft_classification_report(y_true, y_pred, tol, target_names=grade_names, digits=3)
    print(rep)

# Decision Trees

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
clf = DecisionTreeClassifier()
clf.fit(X_train, Y_train)
#
acc_train = clf.score(X_train, Y_train)
acc_valid = clf.score(X_valid, Y_valid)
#
print(acc_train, acc_valid)
#
Y_valid_pred = clf.predict(X_valid)
Y_train_pred = clf.predict(X_train)

In [None]:
y_true = Y_valid
y_pred = Y_valid_pred
for tol in range(2):
    rep = soft_classification_report(y_true, y_pred, tol, target_names=grade_names, digits=3)
    print(rep)

# TORCH LR

In [None]:
X_train_np = np.copy(X_train)
Y_train_np = np.copy(Y_train)
X_valid_np = np.copy(X_valid)
Y_valid_np = np.copy(Y_valid)

In [None]:
X_train = torch.Tensor(X_train_np) * 2 - 1
Y_train = torch.Tensor(Y_train_np)
#
X_valid = torch.Tensor(X_valid_np) * 2 - 1
Y_valid = torch.Tensor(Y_valid_np)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

class MoonModel(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, output_dim),
            #nn.Softmax(dim=-1)
        )
    def forward(self, x):
        x = self.net(x)
        return x

In [None]:
model = MoonModel(input_dim=X.shape[1], output_dim=len(grades))
print(count_parameters(model))
model

In [None]:
device = "cuda:0"
model = model.to(device)
#
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-6)
loss_fn = torch.nn.CrossEntropyLoss(weight=torch.Tensor(class_weights).to(device))
#
x = X_train.to(device)
y = Y_train.to(device).long()
for epoch_idx in range(600):
    #
    optimizer.zero_grad()
    out = model.forward(x)
    loss = loss_fn(out, y) 
    loss.backward()
    optimizer.step()
    if epoch_idx % 100 == 0:
        with torch.no_grad():
            Y_train_pred = model(X_train.to(device)).cpu()
            Y_valid_pred = model(X_valid.to(device)).cpu()
        Y_train_pred = Y_train_pred.argmax(dim=-1)
        Y_valid_pred = Y_valid_pred.argmax(dim=-1)
        acc_train = (Y_train_pred == Y_train).sum() / Y_train.shape[0]
        acc_valid = (Y_valid_pred == Y_valid).sum() / Y_valid.shape[0]
        print("{:.4f} acc_train: {:.3f} acc_valid: {:.3f}".format(loss.item(), acc_train.item(), acc_valid.item()))

In [None]:
with torch.no_grad():
    Y_train_pred = model(X_train.to(device)).cpu()
    Y_valid_pred = model(X_valid.to(device)).cpu()
Y_train_pred = Y_train_pred.argmax(dim=-1)
Y_valid_pred = Y_valid_pred.argmax(dim=-1)
Y_train = Y_train.numpy()
Y_valid = Y_valid.numpy()
Y_train_pred = Y_train_pred.numpy()
Y_valid_pred = Y_valid_pred.numpy()

In [None]:
y_true = Y_valid
y_pred = Y_valid_pred
for tol in range(2):
    rep = soft_classification_report(y_true, y_pred, tol, target_names=grade_names, digits=3)
    print(rep)