In [1]:
import math
import json
import csv
import random
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

## Dataset creation

In [35]:
def calc_heuristics_white(state_str):
    state = np.array(list(state_str)).reshape((9, 9))

    white_pawns = state_str.count('W')
    black_pawns = state_str.count('B')
    pawns = white_pawns + black_pawns
    material = white_pawns*2 - black_pawns/16

    k = state_str.index('K')
    x = k // 9
    y = k % 9
    king_center_distance = (
        math.sqrt(math.pow((4 - x), 2) + math.pow(4 - y, 2)) / 7)

    king_open_files = 4
    if x in [1, 2, 6, 7]:
        i = y-1
        while i >= 0:
            if state[x, i] == 'O':
                i -= 1
            else:
                king_open_files -= 1
                break
        i = y+1
        while i < 9:
            if state[x, i] == 'O':
                i += 1
            else:
                king_open_files -= 1
                break
    else:
        open = 2

    if y in [1, 2, 6, 7]:
        i = x-1
        while i >= 0:
            if state[i, y] == 'O':
                i -= 1
            else:
                king_open_files -= 1
                break
        i = x+1
        while i < 9:
            if state[i, y] == 'O':
                i += 1
            else:
                king_open_files -= 1
                break
    else:
        open = 0

    distances = []
    for i in range(len(state_str)):
        if state_str[i] == 'W':
            distances.append(abs(i//9 - 4) + abs(i % 9 - 4))
    dispersion = sum(distances) / len(distances)

    return math.atan(material), math.atan(king_center_distance), math.atan(king_open_files), math.atan(dispersion)

In [44]:
with open('./dataset/dataset.json') as json_file:
    data = json.load(json_file)

states = data['x']
evals = np.array(data['y'])
# normalized -1, 1
# evals -= 0.4
# evals /= 0.565
# evals *= 2
# evals -= 1

print(len(evals))

print(min(evals))
print(max(evals))

import statistics
print(statistics.mean(evals))
print(statistics.median(evals))

assert len(states) == len(evals)

data_file = open('./dataset/heuristics_white_late.csv', 'w')

csv_writer = csv.writer(data_file)

j = 0
csv_writer.writerow(['h1', 'h2', 'h3', 'h4', 'eval'])
for i in range(len(states)):
    if states[i].count('W') + states[i].count('B') <= 16:
        j += 1
        h1, h2, h3, h4  = calc_heuristics_white(states[i])
        csv_writer.writerow([h1, h2, h3, h4, evals[i]])

print(j)
data_file.close()

22230
0.4
0.965
0.6709003598740441
0.67
4043


## Linear regression

In [45]:
file_path = './dataset/heuristics_white_late.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,h1,h2,h3,h4,eval
0,1.43373,0.0,1.325818,1.107149,0.71
1,1.382575,0.0,1.325818,1.107149,0.755
2,1.382575,0.0,1.325818,1.249046,0.745
3,1.382575,0.0,1.325818,1.249046,0.79
4,1.382575,0.0,1.325818,1.27934,0.7


In [46]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=333)

model = LinearRegression()
model.fit(X_train, y_train)

weights = model.coef_
intercept = model.intercept_

for i, weight in enumerate(weights):
    print(f"Peso per l'euristica {i+1}: {weight}")
print(f"Intercetta (bias): {intercept}")


y_pred = model.predict(X_test)

# MSE
mse = mean_squared_error(y_test, y_pred)
print(" ")
print(f"Errore quadratico medio (MSE) sul set di test: {mse}")

print(" ")
for i in range(10):
    r = random.randint(0, len(y_pred))
    print(f"Reale: {y_test[r]}, Predetto: {y_pred[r]}")

Peso per l'euristica 1: -0.12405967435692923
Peso per l'euristica 2: 0.03412131595789941
Peso per l'euristica 3: -0.004269589031221788
Peso per l'euristica 4: -0.04453968516831189
Intercetta (bias): 0.946260145191089
 
Errore quadratico medio (MSE) sul set di test: 0.004142498731817316
 
Reale: 0.665, Predetto: 0.7053756345372474
Reale: 0.77, Predetto: 0.7190107206627736
Reale: 0.71, Predetto: 0.7722895468007187
Reale: 0.745, Predetto: 0.7869250431554262
Reale: 0.695, Predetto: 0.7033481861821513
Reale: 0.645, Predetto: 0.7072794891938328
Reale: 0.81, Predetto: 0.780604993017046
Reale: 0.79, Predetto: 0.7283011325082137
Reale: 0.805, Predetto: 0.730032967671324
Reale: 0.77, Predetto: 0.7251273659505317
