# Preparing Data
----

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls drive/MyDrive/Chess

In [None]:
!if [[ -e train ]]; then true; else unzip drive/MyDrive/Chess/train.zip > /dev/null ; fi;
!if [[ -e val ]]; then true; else unzip drive/MyDrive/Chess/val.zip > /dev/null ; fi;
!if [[ -e test ]]; then true; else unzip drive/MyDrive/Chess/test.zip > /dev/null ; fi;


In [None]:
!cp drive/MyDrive/Chess/train.csv ./
!cp drive/MyDrive/Chess/val.csv ./
!cp drive/MyDrive/Chess/sample_submission.csv ./

In [None]:
!pip install chess

# Importing Packages

In [None]:
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [None]:
train_df = pd.read_csv('train.csv')
train_df

In [None]:
val_df = pd.read_csv('val.csv')
val_df

## Sample images

In [None]:
fig=plt.figure(figsize=(15, 15))
columns = 4
rows = 5
for i in range(1, columns*rows +1):
    img = cv2.imread('train/'+str(train_df.iloc[i]['ImageID'])+'.jpg')
    fig.add_subplot(rows, columns, i)
    plt.imshow(img)
plt.show()

In [None]:
train_df.iloc[0].label

#Parsing FEN notation

In [None]:
import chess
import numpy as np

In [None]:
img = cv2.imread('train/'+str(train_df.iloc[0]['ImageID'])+'.jpg', cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (256, 256))
img = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT)
plt.imshow(img)

In [None]:
plt.imshow(img[:32, 32:64])

In [None]:
plt.imshow(img[-32:, -32:])


In [None]:
board = chess.Board(train_df.iloc[0].label)
print(board)

In [None]:
print(board.piece_at(chess.parse_square('a3')))

In [None]:
for square in chess.SquareSet(chess.BB_ALL):
  print(str(board.piece_at(square)))

In [None]:
def encode_piece(piece: str):
  if piece == 'None':
    return 0
  elif piece == 'K':
    return 1
  elif piece == 'Q':
    return 2
  elif piece == 'R':
    return 3
  elif piece == 'B':
    return 4
  elif piece == 'N':
    return 5
  elif piece == 'P':
    return 6
  elif piece == 'k':
    return 7
  elif piece == 'q':
    return 8
  elif piece == 'r':
    return 9
  elif piece == 'b':
    return 10
  elif piece == 'n':
    return 11
  else:
    return 12
  
def preprocess_image(path: str):
  img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
  img = cv2.resize(img ,(256, 256))
  img = cv2.GaussianBlur(img,(3,3),cv2.BORDER_DEFAULT)
  cells = []
  for i in range(256-32, -1, -32):
    for j in range(0, 256-31, 32):
      cells.append(img[i: i+32, j: j+32])
  cells = np.array(cells)
  return cells

In [None]:
preprocess_image('train/'+str(train_df.iloc[0]['ImageID'])+'.jpg').shape

In [None]:
X = []
y = []

for i in tqdm(range(len(train_df))):
  imgID = str(train_df.iloc[i]['ImageID'])
  img_path = 'train/'+imgID+'.jpg'

  x = preprocess_image(img_path)
  X.extend(x)

  fen = train_df.iloc[i]['label']
  board = chess.Board(fen)
  for square in chess.SquareSet(chess.BB_ALL):
    y.append(encode_piece(str(board.piece_at(square))))


X = np.array(X)
y = np.array(y)
print('X is: ', X.shape)
print('Y is: ', y.shape)

In [None]:
X_test = []
y_test = []

for i in tqdm(range(len(val_df))):
  imgID = str(val_df.iloc[i]['ImageID'])
  img_path = 'val/'+imgID+'.jpg'

  x = preprocess_image(img_path)
  X_test.extend(x)

  fen = val_df.iloc[i]['label']
  board = chess.Board(fen)
  for square in chess.SquareSet(chess.BB_ALL):
    y_test.append(encode_piece(str(board.piece_at(square))))


X_test = np.array(X_test)
y_test = np.array(y_test)
print('X is: ', X_test.shape)
print('Y is: ', y_test.shape)

#Undersampling Dataset

In [None]:
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.under_sampling import RandomUnderSampler 

In [None]:
Counter(y)

#### Pie chart of Class Distribution

In [None]:
counts = Counter(y)
plt.pie([float(v) for v in counts.values()], labels=[float(k) for k in counts],
           autopct=None, radius=2, shadow=True,explode=(0, 0.2, 0, 0, 0, 0.2, 0, 0.2, 0, 0, 0, 0, 0))
plt.show()

In [None]:
X_reshaped = X.reshape(X.shape[0], -1)
print('Reshaped X is: ', X_reshaped.shape)


In [None]:
rus = RandomUnderSampler(random_state=42)
X_res, y_res = rus.fit_resample(X_reshaped, y)
print('Resampled dataset shape %s' % Counter(y_res))
X_back = X_res.reshape(X_res.shape[0], 32, 32)
print('X_back is: ', X_back.shape)
print('y is: ', y_res.shape)

In [None]:
Counter(y_res)

#### Pie Chart after under sampling the dataset

In [None]:
counts = Counter(y_res)
plt.pie([float(v) for v in counts.values()], labels=[float(k) for k in counts],
           autopct=None, radius=2, shadow=True,)
plt.show()

In [None]:
from keras.utils.np_utils import to_categorical 

In [None]:
y_res_one_hot = to_categorical(y_res, num_classes=13)
print('Shape of [y_res_one_hot]: ', y_res_one_hot.shape)
y_test_one_hot=to_categorical(y_test,num_classes=13)
print('Shape of [y_test_one_hot]: ', y_test_one_hot.shape)

In [None]:
X = X.reshape(X.shape[0], 32,32, 1)

In [None]:
y_one_hot = to_categorical(y, num_classes=13)

In [None]:
X_back=X_res.reshape(X_res.shape[0],32,32,1)
X_test=X_test.reshape(X_test.shape[0],32,32,1)


In [None]:
print('Shape of [X_back] is: ', X_back.shape)

# Model



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D,Flatten

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', patience=3)

## Training model


In [None]:
model=Sequential()
model.add(Conv2D(16, (3,3), padding='same', activation="relu",input_shape=(32,32,1)))
model.add(MaxPool2D(2,2))
model.add(Conv2D(32, (3,3), padding='same', activation="relu"))
model.add(MaxPool2D(2,2))
model.add(Conv2D(64, (3,3), padding='same', activation="relu"))
model.add(MaxPool2D(2,2))
model.add(Flatten())
model.add(Dense(13,activation='softmax'))


In [None]:
from tensorflow.keras.optimizers import Adam

In [None]:
optimizer=Adam(lr=1e-6)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history = model.fit(X_back, y_res_one_hot,validation_data=(X_test,y_test_one_hot), epochs=50, batch_size=1024, callbacks=[es])

### Plotting model learning characteristics

#### Accuracy Plot

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


#### Loss Plot

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#### Classification Report

In [None]:
y_pred=model.predict(X_test)

In [None]:
y_pred=np.argmax(y_pred,axis=1)

In [None]:
y_pred.shape


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print("Classification report:\n", classification_report(y_pred=y_pred,y_true=y_test))

In [None]:
model.save('/content/drive/MyDrive/Chess/Checkpoint_Final.h5')

# Generating FEN on test set

## Loading saved model from checkpoint

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/Chess/Checkpoint3.h5')

In [None]:
def decode_piece(piece: int):
  if piece == 0:
    return None
  elif piece == 1:
    return 'K'
  elif piece == 2:
    return 'Q'
  elif piece == 3:
    return 'R'
  elif piece == 4:
    return 'B'
  elif piece == 5:
    return 'N'
  elif piece == 6:
    return 'P'
  elif piece == 7:
    return 'k'
  elif piece == 8:
    return 'q'
  elif piece == 9:
    return 'r'
  elif piece == 10:
    return 'b'
  elif piece == 11:
    return 'n'
  else:
    return 'p'

## Loading Test Data

In [None]:
X_val = []
y_val = []

for i in tqdm(range(len(val_df))):
  imgID = str(val_df.iloc[i]['ImageID'])
  img_path = 'val/'+imgID+'.jpg'

  x = preprocess_image(img_path)
  X_val.append(x)

  fen = val_df.iloc[i]['label']
  # board = chess.Board(fen)
  # for square in chess.SquareSet(chess.BB_ALL):
  #   y.append(encode_piece(str(board.piece_at(square))))
  y_val.append(fen)

X_val = np.array(X_val)
# y = np.array(y)
print('X is: ', X_val.shape)
print('Y is: ', len(y_val))

In [None]:
def get_fen_image(cells):
  global model
  cells = cells.reshape(-1, 32, 32, 1)
  y_pred = model.predict(cells)
  y_pred = np.argmax(y_pred, axis=1)
  pieces = []
  board = chess.Board()
  board.clear_board()
  for i in y_pred:
    pieces.append(decode_piece(i))
  for counter, square in enumerate(chess.SquareSet(chess.BB_ALL)):
      if pieces[counter] is not None:
        piece = chess.Piece.from_symbol(pieces[counter])
        board.set_piece_at(square, piece)
  return board.fen().split(' ')[0]

In [None]:
predicted_fen_array = []
for example in tqdm(X_val):
  predicted_fen = get_fen_image(example)
  predicted_fen_array.append(predicted_fen)

In [None]:
val_df['PredictedFEN'] = predicted_fen_array

In [None]:
val_df

In [None]:
!pip install jiwer

In [None]:
import jiwer

In [None]:
print("Word Error Rate[WER]: ", jiwer.wer(list(val_df['label']), list(val_df['PredictedFEN'])))

# Confusion matrix

In [None]:
#confusion matrix
cm = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=[0,1,2,3,4,5,6,7,8,9,10,11,12], sample_weight=None, normalize=None)

In [None]:
#confusion matrix plot
from mlxtend.plotting import plot_confusion_matrix

plot_confusion_matrix(cm, figsize=(15,15), cmap=plt.cm.Dark2 )

plt.show()

# Testing with sample image not in either dataset

In [None]:
# Image generated usign lichess
img = cv2.imread('/content/drive/MyDrive/Chess/test.png', cv2.IMREAD_COLOR)
plt.imshow(img)
plt.show()

In [None]:
print('FEN is: r1bq1rk1/p1p2ppp/8/3N4/2B5/8/PP1Q1PPP/R4RK1')

In [None]:
data = preprocess_image('/content/drive/MyDrive/Chess/test.png')
print("Predicted FEN: ", get_fen_image(data))

# AICrowd Submission

In [None]:
unseen_df = pd.read_csv('sample_submission.csv')
unseen_df.head()

In [None]:
ids = [i for i in range(10000)]
submission_df = pd.DataFrame()
submission_df['ImageID'] = ids

In [None]:
submission_df

In [None]:
submission_fen_array = []
for i in tqdm(range(len(submission_df))):
  imgID = str(submission_df.iloc[i]['ImageID'])
  img_path = 'test/'+imgID+'.jpg'
  example = preprocess_image(img_path)
  predicted_fen = get_fen_image(example)
  submission_fen_array.append(predicted_fen)


In [None]:
submission_df['label'] = submission_fen_array

In [None]:
submission_df.to_csv('submission.csv')