In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv("fics_fen_2_2M_rnn.csv", header=None, sep=";", names=["FEN"])
X_data = df["FEN"]

X_train = X_data[:2048000]
X_test =  X_data[2048000:]

idx = np.arange(X_train.shape[0])
np.random.shuffle(idx)
X_train = X_train[idx]

In [None]:
from keras.layers import Input, Dense, Conv2D, UpSampling2D, AveragePooling2D
from keras.models import Model

# this is our input placeholder
input_pos = Input(shape=(8,8,7,))

# "encoded" is the encoded representation of the input
encoded = Conv2D(64*7,(3,3), activation='tanh', padding='same')(input_pos)
encoded = AveragePooling2D((2,2), padding='same')(encoded)
encoded = Conv2D(32*7,(3,3), activation='tanh', padding='same')(encoded)
encoded = AveragePooling2D((2,2), padding='same')(encoded)
encoded = Conv2D(16*7,(3,3), activation='tanh', padding='same')(encoded)
encoded = AveragePooling2D((2,2), padding='same')(encoded)

# "decoded" is the lossy reconstruction of the input
decoded = Conv2D(16*7,(3,3), activation='tanh', padding='same')(encoded)
decoded = UpSampling2D((2,2))(decoded)
decoded = Conv2D(32*7,(3,3), activation='tanh', padding='same')(decoded)
decoded = UpSampling2D((2,2))(decoded)
decoded = Conv2D(64*7,(3,3), activation='tanh', padding='same')(decoded)
decoded = UpSampling2D((2,2))(decoded)
decoded = Conv2D(7,(3,3), activation='tanh', padding='same')(decoded)

# this model maps an input to its reconstruction

autoencoder = Model(input_pos, decoded)
autoencoder.summary()

In [None]:
def batchtotensor(inputbatch):
    
    pieces_str = "PNBRQK"
    pieces_str += pieces_str.lower()
    pieces = set(pieces_str)
    valid_spaces = set(range(1,9))
    pieces_dict = {pieces_str[0]:1, pieces_str[1]:2, pieces_str[2]:3, pieces_str[3]:4,
                    pieces_str[4]:5, pieces_str[5]:6,
                    pieces_str[6]:-1, pieces_str[7]:-2, pieces_str[8]:-3, pieces_str[9]:-4, 
                    pieces_str[10]:-5, pieces_str[11]:-6}

    maxnum = len(inputbatch)
    boardtensor = np.zeros((maxnum, 8, 8,7))
    
    for num, inputstr in enumerate(inputbatch):
        inputliste = inputstr.split()
        #print(num,inputstr)
        rownr = 0
        colnr = 0
        for i, c in enumerate(inputliste[0]):
            if c in pieces:
                boardtensor[num, rownr, colnr, np.abs(pieces_dict[c])-1] = np.sign(pieces_dict[c])
                colnr = colnr + 1
            elif c == '/':  # new row
                rownr = rownr + 1
                colnr = 0
            elif int(c) in valid_spaces:
                colnr = colnr + int(c)
            else:
                raise ValueError("invalid fenstr at index: {} char: {}".format(i, c))
        
        if inputliste[1] == "w":
            for i in range(8):
                for j in range(8):
                    boardtensor[num, i, j, 6] = 1
        else:
            for i in range(8):
                for j in range(8):
                    boardtensor[num, i, j, 6] = -1
  
    return boardtensor


In [None]:
from keras import optimizers
opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)
autoencoder.compile(optimizer=opt, loss='mean_squared_error')

def myGenerator():
    while 1:
        for i in range(16000): # 16000 * 128 = 2048000 -> # of training samples
            ret = batchtotensor(X_train[i*128:(i+1)*128])
            ret = ret.reshape((128, np.prod(ret.shape[1:])))
            yield (ret, ret)

my_generator = myGenerator()
testtensor = batchtotensor(X_test)
validdata = testtensor.reshape((len(X_test), np.prod(testtensor.shape[1:])))

history = autoencoder.fit_generator(my_generator, steps_per_epoch = 16000, epochs = 100, verbose=1, 
              validation_data=(validdata,validdata), workers=1)
autoencoder.save('dense_100.h5')