In [2]:
import math
import json
import csv
import random
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

## Dataset creation

In [None]:
def calc_heuristics_white(state_str):
    state = np.array(list(state_str)).reshape((9,9))

    material = state_str.count('W')*2 - state_str.count('B')/16
    
    k = state_str.index('K')
    x = k // 9
    y = k % 9
    king_center_distance = (math.sqrt(math.pow((4 - x), 2) + math.pow(4 - y, 2)) / 7)

    king_open_files = 4
    i = y-1
    while i >= 0:
        if state[x, i] == 'O':
            i -= 1
        else:
            king_open_files -= 1
            break
    
    i = y+1
    while i < 9:
        if state[x, i] == 'O':
            i += 1
        else:
            king_open_files -= 1
            break
    
    
    i = x-1
    while i >= 0:
        if state[i, y] == 'O':
            i -= 1
        else:
            king_open_files -= 1
            break
    
    i = x+1
    while i < 9:
        if state[i, y] == 'O':
            i += 1
        else:
            king_open_files -= 1
            break

    distances = []
    for i in range(len(state_str)):
        if state_str[i] == 'W':
            distances.append(abs(i//9 - 4) + abs(i%9 - 4))
    dispersion = sum(distances) / len(distances)

    return math.atan(material)/(math.pi/2), math.atan(king_center_distance)/(math.pi/2), math.atan(king_open_files)/(math.pi/2), math.atan(dispersion)/(math.pi/2)

In [51]:
with open('./dataset/dataset.json') as json_file:
    data = json.load(json_file)

states = data['x']
evals = np.array(data['y'])
# normalized -1, 1
# evals -= 0.4
# evals /= 0.565
# evals *= 2
# evals -= 1

print(min(evals))
print(max(evals))

import statistics
print(statistics.mean(evals))
print(statistics.median(evals))

assert len(states) == len(evals)

data_file = open('./dataset/heuristics_white.csv', 'w')

csv_writer = csv.writer(data_file)

csv_writer.writerow(['h1', 'h2', 'h3', 'h4', 'eval'])
for i in range(len(states)):
    h1, h2, h3, h4  = calc_heuristics_white(states[i])
    csv_writer.writerow([h1, h2, h3, h4, evals[i]])

data_file.close()

0.4
0.965
0.6709003598740441
0.67


## Linear regression

In [52]:
file_path = './dataset/heuristics_white.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,h1,h2,h3,h4,eval
0,0.957621,0.0,0.0,0.625666,0.59
1,0.957621,0.0,0.0,0.688083,0.685
2,0.951125,0.0,0.0,0.685547,0.575
3,0.951125,0.0,0.0,0.737451,0.7
4,0.951125,0.0,0.0,0.737451,0.74


In [53]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=333)

model = LinearRegression()
model.fit(X_train, y_train)

weights = model.coef_
intercept = model.intercept_

for i, weight in enumerate(weights):
    print(f"Peso per l'euristica {i+1}: {weight}")
print(f"Intercetta (bias): {intercept}")


y_pred = model.predict(X_test)

# MSE
mse = mean_squared_error(y_test, y_pred)
print(" ")
print(f"Errore quadratico medio (MSE) sul set di test: {mse}")

print(" ")
for i in range(10):
    r = random.randint(0, len(y_pred))
    print(f"Reale: {y_test[r]}, Predetto: {y_pred[r]}")

Peso per l'euristica 1: -0.39274244473295117
Peso per l'euristica 2: 0.1331368781635933
Peso per l'euristica 3: -0.010044454853995709
Peso per l'euristica 4: -0.06496793646299999
Intercetta (bias): 1.0723719466401846
 
Errore quadratico medio (MSE) sul set di test: 0.0038906360424809925
 
Reale: 0.785, Predetto: 0.6935355760059283
Reale: 0.75, Predetto: 0.6836678023957776
Reale: 0.735, Predetto: 0.640988098092569
Reale: 0.705, Predetto: 0.6861304536201932
Reale: 0.615, Predetto: 0.6420209120297553
Reale: 0.73, Predetto: 0.6808781896797365
Reale: 0.72, Predetto: 0.6754766319610677
Reale: 0.545, Predetto: 0.6392799065562543
Reale: 0.775, Predetto: 0.669418092727487
Reale: 0.785, Predetto: 0.7256337881808592
