In [1]:
'''
Worked on by: Meena Hari and Tarini Singh.

We perform data preprocessing using KNearestNeighbors.
66 new features are generated.

Trained a 1 layer ANN with transformed, higher dimensional 
dataset (each input consists of the raw board representaion 
(list of integers from 1 - 16) plus 66 newly generated features).

In prog.

'''

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm
import keras.backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Conv2D, Flatten, Input
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from keras.models import load_model
import keras.losses

from constants import * 
from heuristic import *
from io_help import *
from solver import *

def load_data(file_name):
	"""
	This function reads in training data from a file and returns 
	the boards in X and their labels in Y as a tuple. 
	"""
	file = open(file_name, "r")

	X = []
	Y = []

	for string in file: 
		(board, dist) = string_to_board_and_dist(string)
		X.append(np.asarray(board).flatten())
		Y.append(dist)
    

	file.close()

	X_train = np.asarray(X)
	Y_train = np.asarray(Y)
    
	return(X_train, Y_train)

Using TensorFlow backend.


In [2]:
# Load dataset. 
# X: board inputs, Y: true output.
(X_train,Y_train) = load_data('Uncombined Data Files/meena_5_19_2020_93844.txt')

In [3]:
knn_model = NearestNeighbors(n_neighbors=50, n_jobs = -1).fit(X_train,Y_train)

In [4]:
def gen_features (X, X_train):
    data_arr = np.zeros([len(X), 16*2*2 + 2])
    pred = knn_model.kneighbors(X)
    
    for i in tqdm(range(len(X))):
        row = X[i]
        # Grabs the rows in X corresponding to 50 nearest neighbors of X[i].
        # pred[1][i] contains a list of the indices of the 50 nearest neighbors.
        data = X_train[pred[1][i]]
        # Divide X[i] by each of its neighbors. div should be a 
        # 50 x 16 matrix, i.e. div[j] = X[i] / X[j].
        div = (row / data)
        # Subtract X[i] by each of its neighbors. diff should be a 
        # 50 x 16 dimension matrix.
        diff = (row - data)
        # concat is a 50 x 32 matrix.
        concat = np.concatenate([div, diff], axis = 1)
        # means is a 50 x 32 matrix.
        # std is a 50 x 32 matrix.
        means, stds = np.nanmean(concat, axis = 0), np.nanstd(concat, axis = 0)
        # Populate data_arr with newly generated features.
        data_arr[i, :len(means)] = means
        data_arr[i, len(means):len(means) + len(stds)] = stds
        data_arr[i, -1] = np.nanmean(pred[0][i])
        data_arr[i, -2] = np.nanstd(pred[0][i])
    # Concatenate generated features to the original dataset.
    return np.concatenate([X, data_arr], axis=1)

In [5]:
X_train_2 = gen_features(X_train, X_train)
# Following output makes sense because the first couple of rows represent boards 
# close to the solution (based on the way we ordered our training sets... first data points in X
# are 1, 2, and 3 moves away from solution), so div will be 1 for most entries, diff will be 0 for most
# entries, and so on. The last couple rows will represent boards farther from the solution, so the generated 
# features are not clean numbers..

100%|██████████| 93844/93844 [00:31<00:00, 2977.33it/s]


In [6]:
def shift_mse(y_true, y_pred):
    """custom loss functions"""
    loss = (1 + 1/ (1 + K.exp(-(y_pred - y_true)))) * K.square(y_pred - y_true)
    loss = K.mean(loss, axis = 1)
    return loss
keras.losses.shift_mse = shift_mse

In [7]:
# Build Model
model = Sequential()

# Input Layer
model.add(Dense(units=(16*2*2+2+16), input_dim=(16*2*2+2+16), activation='relu'))
model.add(Dropout(0.1))

# Hidden Layers
model.add(Dense(units=66+16, activation='relu'))

# Output Layer
model.add(Dense(units=1, activation='linear'))

# Define the optimizer and loss function
#model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model.compile(optimizer='adam', loss=shift_mse, metrics=['accuracy'])

# You can also define a custom loss function
# model.compile(optimizer='adam', loss=custom_loss)

# Train 
model.fit(X_train_2, Y_train, epochs=20)

# Test
#score = model.evaluate(X_test, Y_test)

#print(score)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x104b58c88>

In [8]:
# Rough Testing

# Load test dataset. 
# X_test: board inputs, Y_test: true output.
(X_test,Y_test) = load_data('Uncombined Data Files/Yasmin_5_19_10048.txt')

# Transform X_test to higher dimension.
X_test_2 = gen_features (X_test, X_train)

100%|██████████| 10047/10047 [00:03<00:00, 2544.81it/s]


In [10]:
dist_over_i = []
misclass_i = 0
dist_under_i = []

dist_over_r = []
misclass_r = 0
dist_under_r = []

for i in tqdm(range(len(X_test))):
    nn_heur_i = int(model.predict(X_test_2[i:(i+1),:]))
    nn_heur_r = np.around(model.predict(X_test_2[i:(i+1),:]))
    man_heur = manhattan(X_test[i].reshape(SIZE,SIZE), model)
    y = Y_test[i]
    
    ### TRUNCATE ###
    if (nn_heur_i > y):
        dist_over_i.append(nn_heur_i - y)
    
    if (nn_heur_i <= y):
        dist_under_i.append(y - nn_heur_i)
    
    if (nn_heur_i != y):
        misclass_i += 1
        
    ##### ROUND ##### 
    if (nn_heur_r > y):
        dist_over_r.append(nn_heur_r - y)
    
    if (nn_heur_r <= y):
        dist_under_r.append(y - nn_heur_r)
    
    if (nn_heur_r != y):
        misclass_r += 1
    
avg_dist_over_i = np.mean(np.asarray(dist_over_i))
avg_dist_under_i = np.mean(np.asarray(dist_under_i))
out_sample_error_i = misclass_i / len(X_test)

avg_dist_over_r = np.mean(np.asarray(dist_over_r))
avg_dist_under_r = np.mean(np.asarray(dist_under_r))
out_sample_error_r = misclass_r / len(X_test)
 
print("------ TRUCATION: ------")
print("Avg distance overestimated: ", avg_dist_over_i)
print("Avg distance underestimated: ", avg_dist_under_i)
print("E_admiss: ", len(dist_over_i)/len(X_test))
print("E_out: ", out_sample_error_i)

print("------ ROUNDED: ------")
print("Avg distance overestimated: ", avg_dist_over_r)
print("Avg distance underestimated: ", avg_dist_under_r)
print("E_admiss: ", len(dist_over_r)/len(X_test))
print("E_out: ", out_sample_error_r)

100%|██████████| 10047/10047 [00:17<00:00, 558.42it/s]

------ TRUCATION: ------ 

Avg distance overestimated:  2.732376530181511
Avg distance underestimated:  2.1070591299817663
E_admiss:  0.23579177864039016
E_out:  0.7764506817955609
------ ROUNDED: ------ 

Avg distance overestimated:  2.6403682
Avg distance underestimated:  1.8386867
E_admiss:  0.3027769483427889
E_out:  0.6852791878172588



