In [35]:
import math
import json
import csv
import random
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

## Dataset creation

In [26]:
print(math.atan(1))

0.7853981633974483


In [29]:
def calc_heuristics_white(state_str):
    state = np.array(list(state_str)).reshape((9,9))

    material = state_str.count('W')*2 - state_str.count('B')/16
    
    k = state_str.index('K')
    x = k // 9
    y = k % 9
    king_center_distance = (math.sqrt(math.pow((4 - x), 2) + math.pow(4 - y, 2)) / 7)

    king_open_files = 4
    i = y-1
    while i >= 0:
        if state[x, i] == 'O':
            i -= 1
        else:
            king_open_files -= 1
            break
    
    i = y+1
    while i < 9:
        if state[x, i] == 'O':
            i += 1
        else:
            king_open_files -= 1
            break
    
    
    i = x-1
    while i >= 0:
        if state[i, y] == 'O':
            i -= 1
        else:
            king_open_files -= 1
            break
    
    i = x+1
    while i < 9:
        if state[i, y] == 'O':
            i += 1
        else:
            king_open_files -= 1
            break

    distances = []
    for i in range(len(state_str)):
        if state_str[i] == 'W':
            distances.append(abs(i//9 - 4) + abs(i%9 - 4))
    dispersion = sum(distances) / len(distances)

    return math.atan(material)/(math.pi/2), math.atan(king_center_distance)/(math.pi/2), math.atan(king_open_files)/(math.pi/2), math.atan(dispersion)/(math.pi/2)

In [55]:
with open('./dataset/dataset.json') as json_file:
    data = json.load(json_file)

states = data['x']
evals = data['y']

print(evals[:100])
print(np.array(evals[:100])*2-1)
# assert len(states) == len(evals)

# data_file = open('./dataset/heuristics_white.csv', 'w')

# csv_writer = csv.writer(data_file)

# csv_writer.writerow(['h1', 'h2', 'h3', 'h4', 'eval'])
# for i in range(len(states)):
#     h1, h2, h3, h4  = calc_heuristics_white(states[i])
#     csv_writer.writerow([h1, h2, h3, h4, evals[i]*2-1])

# data_file.close()

[0.59, 0.685, 0.575, 0.7, 0.74, 0.65, 0.645, 0.71, 0.605, 0.76, 0.73, 0.72, 0.675, 0.79, 0.75, 0.75, 0.625, 0.66, 0.645, 0.635, 0.645, 0.73, 0.705, 0.705, 0.75, 0.745, 0.7, 0.725, 0.69, 0.76, 0.71, 0.755, 0.745, 0.79, 0.7, 0.715, 0.815, 0.775, 0.87, 0.945, 0.61, 0.67, 0.72, 0.635, 0.61, 0.62, 0.595, 0.575, 0.54, 0.59, 0.6, 0.74, 0.54, 0.73, 0.65, 0.81, 0.705, 0.68, 0.65, 0.675, 0.645, 0.625, 0.645, 0.685, 0.69, 0.665, 0.635, 0.67, 0.645, 0.64, 0.82, 0.67, 0.745, 0.66, 0.685, 0.605, 0.705, 0.7, 0.665, 0.76, 0.78, 0.785, 0.79, 0.725, 0.685, 0.66, 0.665, 0.665, 0.67, 0.615, 0.685, 0.73, 0.62, 0.675, 0.725, 0.675, 0.73, 0.81, 0.755, 0.73]
[0.18 0.37 0.15 0.4  0.48 0.3  0.29 0.42 0.21 0.52 0.46 0.44 0.35 0.58
 0.5  0.5  0.25 0.32 0.29 0.27 0.29 0.46 0.41 0.41 0.5  0.49 0.4  0.45
 0.38 0.52 0.42 0.51 0.49 0.58 0.4  0.43 0.63 0.55 0.74 0.89 0.22 0.34
 0.44 0.27 0.22 0.24 0.19 0.15 0.08 0.18 0.2  0.48 0.08 0.46 0.3  0.62
 0.41 0.36 0.3  0.35 0.29 0.25 0.29 0.37 0.38 0.33 0.27 0.34 0.29 0.28
 0

## Linear regression

In [33]:
file_path = './dataset/heuristics_white.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,h1,h2,h3,h4,eval
0,0.957621,0.0,0.0,0.625666,0.18
1,0.957621,0.0,0.0,0.688083,0.37
2,0.951125,0.0,0.0,0.685547,0.15
3,0.951125,0.0,0.0,0.737451,0.4
4,0.951125,0.0,0.0,0.737451,0.48


In [51]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=333)

model = LinearRegression()
model.fit(X_train, y_train)

weights = model.coef_
intercept = model.intercept_

for i, weight in enumerate(weights):
    print(f"Peso per l'euristica {i+1}: {weight}")
print(f"Intercetta (bias): {intercept}")


y_pred = model.predict(X_test)

# MSE
mse = mean_squared_error(y_test, y_pred)
print(" ")
print(f"Errore quadratico medio (MSE) sul set di test: {mse}")

print(" ")
for i in range(10):
    r = random.randint(0, len(y_pred))
    print(f"Reale: {y_test[r]}, Predetto: {y_pred[r]}")

Peso per l'euristica 1: -0.785484889465902
Peso per l'euristica 2: 0.2662737563271873
Peso per l'euristica 3: -0.020088909707991355
Peso per l'euristica 4: -0.12993587292599992
Intercetta (bias): 1.144743893280369
 
Errore quadratico medio (MSE) sul set di test: 0.015562544169923975
 
Reale: 0.3600000000000001, Predetto: 0.3895731202698999
Reale: 0.29, Predetto: 0.2785598131125089
Reale: 0.07, Predetto: 0.4292903259711658
Reale: 0.4099999999999999, Predetto: 0.3009637004845197
Reale: 0.31, Predetto: 0.3191067033924265
Reale: 0.24, Predetto: 0.3032410653869617
Reale: 0.4499999999999999, Predetto: 0.35770922798552807
Reale: 0.4399999999999999, Predetto: 0.33596473087059797
Reale: 0.3, Predetto: 0.3607702255300056
Reale: 0.1899999999999999, Predetto: 0.2785598131125089
