diff --git a/Architecture/Architecture.JPG b/Architecture/Architecture.JPG
new file mode 100644
index 0000000..9f4864f
Binary files /dev/null and b/Architecture/Architecture.JPG differ
diff --git a/GamePlay.ipynb b/GamePlay.ipynb
new file mode 100644
index 0000000..6fbabfa
--- /dev/null
+++ b/GamePlay.ipynb
@@ -0,0 +1,530 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "# Play 2048 using Trained Weights"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Import Required Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from copy import deepcopy\n",
+ "import random \n",
+ "import math\n",
+ "import time\n",
+ "from tkinter import *"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Restore Architecture"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#conv layer1 depth\n",
+ "depth1 = 128\n",
+ "\n",
+ "#conv layer2 depth\n",
+ "depth2 = 128\n",
+ "\n",
+ "#input depth\n",
+ "input_depth = 16\n",
+ "\n",
+ "#fully conneted hidden layer\n",
+ "hidden_units = 256\n",
+ "\n",
+ "#output layer\n",
+ "output_units = 4\n",
+ "\n",
+ "#shape of weights\n",
+ "conv1_layer1_shape = [2,1,input_depth,depth1]\n",
+ "conv1_layer2_shape = [2,1,depth1,depth2]\n",
+ "conv2_layer1_shape = [1,2,input_depth,depth1]\n",
+ "conv2_layer2_shape = [1,2,depth1,depth2]\n",
+ "\n",
+ "fc_layer1_w_shape = [3*4*depth1*2+ 4*2*depth2*2 + 3*3*depth2*2,hidden_units]\n",
+ "fc_layer1_b_shape = [hidden_units]\n",
+ "fc_layer2_w_shape = [hidden_units,output_units]\n",
+ "fc_layer2_b_shape = [output_units]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Load The parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "parameters = dict()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = r'E:\\Study n Work\\Projects\\2048\\Final Weights'\n",
+ "parameters['conv1_layer1'] = np.array(pd.read_csv(path + r'\\conv1_layer1_weights.csv')['Weight']).reshape(conv1_layer1_shape)\n",
+ "parameters['conv1_layer2'] = np.array(pd.read_csv(path + r'\\conv1_layer2_weights.csv')['Weight']).reshape(conv1_layer2_shape)\n",
+ "parameters['conv2_layer1'] = np.array(pd.read_csv(path + r'\\conv2_layer1_weights.csv')['Weight']).reshape(conv2_layer1_shape)\n",
+ "parameters['conv2_layer2'] = np.array(pd.read_csv(path + r'\\conv2_layer2_weights.csv')['Weight']).reshape(conv2_layer2_shape)\n",
+ "parameters['fc_layer1_w'] = np.array(pd.read_csv(path + r'\\fc_layer1_weights.csv')['Weight']).reshape(fc_layer1_w_shape)\n",
+ "parameters['fc_layer1_b'] = np.array(pd.read_csv(path + r'\\fc_layer1_biases.csv')['Weight']).reshape(fc_layer1_b_shape)\n",
+ "parameters['fc_layer2_w'] = np.array(pd.read_csv(path + r'\\fc_layer2_weights.csv')['Weight']).reshape(fc_layer2_w_shape)\n",
+ "parameters['fc_layer2_b'] = np.array(pd.read_csv(path + r'\\fc_layer2_biases.csv')['Weight']).reshape(fc_layer2_b_shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Game Logic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def new_game(n):\n",
+ " matrix = []\n",
+ "\n",
+ " for i in range(n):\n",
+ " matrix.append([0] * n)\n",
+ " return matrix\n",
+ "\n",
+ "def add_two(mat):\n",
+ " empty_cells = []\n",
+ " for i in range(len(mat)):\n",
+ " for j in range(len(mat)):\n",
+ " if(mat[i][j]==0):\n",
+ " empty_cells.append((i,j))\n",
+ " if(len(empty_cells)==0):\n",
+ " return mat\n",
+ " index_pair = empty_cells[random.randint(0,len(empty_cells)-1)]\n",
+ " \n",
+ " num = np.random.random(1)\n",
+ " if(num>0.9):\n",
+ " mat[index_pair[0]][index_pair[1]]=4\n",
+ " else:\n",
+ " mat[index_pair[0]][index_pair[1]]=2\n",
+ " return mat\n",
+ "\n",
+ "def game_state(mat):\n",
+ " for i in range(len(mat)-1): #intentionally reduced to check the row on the right and below\n",
+ " for j in range(len(mat[0])-1): #more elegant to use exceptions but most likely this will be their solution\n",
+ " if mat[i][j]==mat[i+1][j] or mat[i][j+1]==mat[i][j]:\n",
+ " return 'not over'\n",
+ " for i in range(len(mat)): #check for any zero entries\n",
+ " for j in range(len(mat[0])):\n",
+ " if mat[i][j]==0:\n",
+ " return 'not over'\n",
+ " for k in range(len(mat)-1): #to check the left/right entries on the last row\n",
+ " if mat[len(mat)-1][k]==mat[len(mat)-1][k+1]:\n",
+ " return 'not over'\n",
+ " for j in range(len(mat)-1): #check up/down entries on last column\n",
+ " if mat[j][len(mat)-1]==mat[j+1][len(mat)-1]:\n",
+ " return 'not over'\n",
+ " return 'lose'\n",
+ "\n",
+ "def reverse(mat):\n",
+ " new=[]\n",
+ " for i in range(len(mat)):\n",
+ " new.append([])\n",
+ " for j in range(len(mat[0])):\n",
+ " new[i].append(mat[i][len(mat[0])-j-1])\n",
+ " return new\n",
+ "\n",
+ "def transpose(mat):\n",
+ " new=[]\n",
+ " for i in range(len(mat[0])):\n",
+ " new.append([])\n",
+ " for j in range(len(mat)):\n",
+ " new[i].append(mat[j][i])\n",
+ " return new\n",
+ "\n",
+ "def cover_up(mat):\n",
+ " new=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]\n",
+ " done=False\n",
+ " for i in range(4):\n",
+ " count=0\n",
+ " for j in range(4):\n",
+ " if mat[i][j]!=0:\n",
+ " new[i][count]=mat[i][j]\n",
+ " if j!=count:\n",
+ " done=True\n",
+ " count+=1\n",
+ " return (new,done)\n",
+ "\n",
+ "def merge(mat):\n",
+ " done=False\n",
+ " score = 0\n",
+ " for i in range(4):\n",
+ " for j in range(3):\n",
+ " if mat[i][j]==mat[i][j+1] and mat[i][j]!=0:\n",
+ " mat[i][j]*=2\n",
+ " score += mat[i][j] \n",
+ " mat[i][j+1]=0\n",
+ " done=True\n",
+ " return (mat,done,score)\n",
+ "\n",
+ "\n",
+ "def up(game):\n",
+ " game=transpose(game)\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " game=transpose(game)\n",
+ " return (game,done,temp[2])\n",
+ "\n",
+ "def down(game):\n",
+ " game=reverse(transpose(game))\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " game=transpose(reverse(game))\n",
+ " return (game,done,temp[2])\n",
+ "\n",
+ "def left(game):\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " return (game,done,temp[2])\n",
+ "\n",
+ "def right(game):\n",
+ " game=reverse(game)\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " game=reverse(game)\n",
+ " return (game,done,temp[2])\n",
+ " \n",
+ "\n",
+ "def findemptyCell(mat):\n",
+ " count = 0\n",
+ " for i in range(len(mat)):\n",
+ " for j in range(len(mat)):\n",
+ " if(mat[i][j]==0):\n",
+ " count+=1\n",
+ " return count\n",
+ "\n",
+ "def change_values(X):\n",
+ " power_mat = np.zeros(shape=(1,4,4,16),dtype=np.float32)\n",
+ " for i in range(4):\n",
+ " for j in range(4):\n",
+ " if(X[i][j]==0):\n",
+ " power_mat[0][i][j][0] = 1.0\n",
+ " else:\n",
+ " power = int(math.log(X[i][j],2))\n",
+ " power_mat[0][i][j][power] = 1.0\n",
+ " return power_mat"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Controls"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "controls = {0:up,1:left,2:right,3:down}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Resore the Graph"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "learned_graph = tf.Graph()\n",
+ "\n",
+ "with learned_graph.as_default():\n",
+ " \n",
+ " #input data\n",
+ " single_dataset = tf.placeholder(tf.float32,shape=(1,4,4,16))\n",
+ " \n",
+ " #weights\n",
+ " \n",
+ " #conv layer1 weights\n",
+ " conv1_layer1_weights = tf.constant(parameters['conv1_layer1'],dtype=tf.float32)\n",
+ " conv1_layer2_weights = tf.constant(parameters['conv1_layer2'],dtype=tf.float32)\n",
+ " \n",
+ " #conv layer2 weights\n",
+ " conv2_layer1_weights = tf.constant(parameters['conv2_layer1'],dtype=tf.float32)\n",
+ " conv2_layer2_weights = tf.constant(parameters['conv2_layer2'],dtype=tf.float32)\n",
+ " \n",
+ " #fully connected parameters\n",
+ " fc_layer1_weights = tf.constant(parameters['fc_layer1_w'],dtype=tf.float32)\n",
+ " fc_layer1_biases = tf.constant(parameters['fc_layer1_b'],dtype=tf.float32)\n",
+ " fc_layer2_weights = tf.constant(parameters['fc_layer2_w'],dtype=tf.float32)\n",
+ " fc_layer2_biases = tf.constant(parameters['fc_layer2_b'],dtype=tf.float32)\n",
+ " \n",
+ " \n",
+ " #model\n",
+ " def model(dataset):\n",
+ " #layer1\n",
+ " conv1 = tf.nn.conv2d(dataset,conv1_layer1_weights,[1,1,1,1],padding='VALID') \n",
+ " conv2 = tf.nn.conv2d(dataset,conv2_layer1_weights,[1,1,1,1],padding='VALID') \n",
+ "\n",
+ " #layer1 relu activation\n",
+ " relu1 = tf.nn.relu(conv1)\n",
+ " relu2 = tf.nn.relu(conv2)\n",
+ "\n",
+ " #layer2\n",
+ " conv11 = tf.nn.conv2d(relu1,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ " conv12 = tf.nn.conv2d(relu1,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ "\n",
+ " conv21 = tf.nn.conv2d(relu2,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ " conv22 = tf.nn.conv2d(relu2,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ "\n",
+ " #layer2 relu activation\n",
+ " relu11 = tf.nn.relu(conv11)\n",
+ " relu12 = tf.nn.relu(conv12)\n",
+ " relu21 = tf.nn.relu(conv21)\n",
+ " relu22 = tf.nn.relu(conv22)\n",
+ "\n",
+ " #get shapes of all activations\n",
+ " shape1 = relu1.get_shape().as_list()\n",
+ " shape2 = relu2.get_shape().as_list()\n",
+ "\n",
+ " shape11 = relu11.get_shape().as_list()\n",
+ " shape12 = relu12.get_shape().as_list()\n",
+ " shape21 = relu21.get_shape().as_list()\n",
+ " shape22 = relu22.get_shape().as_list()\n",
+ "\n",
+ " #expansion\n",
+ " hidden1 = tf.reshape(relu1,[shape1[0],shape1[1]*shape1[2]*shape1[3]])\n",
+ " hidden2 = tf.reshape(relu2,[shape2[0],shape2[1]*shape2[2]*shape2[3]])\n",
+ "\n",
+ " hidden11 = tf.reshape(relu11,[shape11[0],shape11[1]*shape11[2]*shape11[3]])\n",
+ " hidden12 = tf.reshape(relu12,[shape12[0],shape12[1]*shape12[2]*shape12[3]])\n",
+ " hidden21 = tf.reshape(relu21,[shape21[0],shape21[1]*shape21[2]*shape21[3]])\n",
+ " hidden22 = tf.reshape(relu22,[shape22[0],shape22[1]*shape22[2]*shape22[3]])\n",
+ "\n",
+ " #concatenation\n",
+ " hidden = tf.concat([hidden1,hidden2,hidden11,hidden12,hidden21,hidden22],axis=1)\n",
+ "\n",
+ " #full connected layers\n",
+ " hidden = tf.matmul(hidden,fc_layer1_weights) + fc_layer1_biases\n",
+ " hidden = tf.nn.relu(hidden)\n",
+ "\n",
+ " #output layer\n",
+ " output = tf.matmul(hidden,fc_layer2_weights) + fc_layer2_biases\n",
+ "\n",
+ " #return output\n",
+ " return output\n",
+ "\n",
+ " #for single example\n",
+ " single_output = model(single_dataset)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Enjoy the Game!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "SIZE = 500\n",
+ "GRID_LEN = 4\n",
+ "GRID_PADDING = 10\n",
+ "\n",
+ "BACKGROUND_COLOR_GAME = \"#92877d\"\n",
+ "BACKGROUND_COLOR_CELL_EMPTY = \"#9e948a\"\n",
+ "BACKGROUND_COLOR_DICT = { 2:\"#eee4da\", 4:\"#ede0c8\", 8:\"#f2b179\", 16:\"#f59563\", \\\n",
+ " 32:\"#f67c5f\", 64:\"#f65e3b\", 128:\"#edcf72\", 256:\"#edcc61\", \\\n",
+ " 512:\"#edc850\", 1024:\"#edc53f\", 2048:\"#edc22e\" }\n",
+ "\n",
+ "CELL_COLOR_DICT = { 2:\"#776e65\", 4:\"#776e65\", 8:\"#f9f6f2\", 16:\"#f9f6f2\", \\\n",
+ " 32:\"#f9f6f2\", 64:\"#f9f6f2\", 128:\"#f9f6f2\", 256:\"#f9f6f2\", \\\n",
+ " 512:\"#f9f6f2\", 1024:\"#f9f6f2\", 2048:\"#f9f6f2\" }\n",
+ "\n",
+ "FONT = (\"Verdana\", 40, \"bold\")\n",
+ "\n",
+ "learned_sess = tf.Session(graph=learned_graph)\n",
+ "\n",
+ "class GameGrid(Frame):\n",
+ " def __init__(self):\n",
+ " Frame.__init__(self)\n",
+ "\n",
+ " self.grid()\n",
+ " self.master.title('2048')\n",
+ "\n",
+ " self.grid_cells = []\n",
+ " self.init_grid()\n",
+ " self.init_matrix()\n",
+ " self.update_grid_cells()\n",
+ " \n",
+ " self.wait_visibility()\n",
+ " self.after(10,self.make_move)\n",
+ " \n",
+ " def init_grid(self):\n",
+ " background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)\n",
+ " background.grid()\n",
+ " for i in range(GRID_LEN):\n",
+ " grid_row = []\n",
+ " for j in range(GRID_LEN):\n",
+ " cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)\n",
+ " cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)\n",
+ " # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)\n",
+ " t = Label(master=cell, text=\"\", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)\n",
+ " t.grid()\n",
+ " grid_row.append(t)\n",
+ "\n",
+ " self.grid_cells.append(grid_row)\n",
+ "\n",
+ " def gen(self):\n",
+ " return randint(0, GRID_LEN - 1)\n",
+ "\n",
+ " def init_matrix(self):\n",
+ " self.matrix = new_game(4)\n",
+ "\n",
+ " self.matrix=add_two(self.matrix)\n",
+ " self.matrix=add_two(self.matrix)\n",
+ "\n",
+ " def update_grid_cells(self):\n",
+ " for i in range(GRID_LEN):\n",
+ " for j in range(GRID_LEN):\n",
+ " new_number = self.matrix[i][j]\n",
+ " if new_number == 0:\n",
+ " self.grid_cells[i][j].configure(text=\"\", bg=BACKGROUND_COLOR_CELL_EMPTY)\n",
+ " else:\n",
+ " self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])\n",
+ " self.update_idletasks()\n",
+ " \n",
+ " def make_move(self):\n",
+ " output = learned_sess.run([single_output],feed_dict = {single_dataset:change_values(self.matrix)})\n",
+ " move = np.argmax(output[0])\n",
+ " self.matrix,done,tempo = controls[move](self.matrix)\n",
+ " done=True\n",
+ " \n",
+ " #if game_state(self.matrix)=='win':\n",
+ " # self.grid_cells[1][1].configure(text=\"You\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n",
+ " # self.grid_cells[1][2].configure(text=\"Win!\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n",
+ " # done=False\n",
+ " if game_state(self.matrix)=='lose':\n",
+ " self.grid_cells[1][1].configure(text=\"You\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n",
+ " self.grid_cells[1][2].configure(text=\"Lose!\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n",
+ " done=False\n",
+ " \n",
+ " self.matrix = add_two(self.matrix)\n",
+ " self.update_grid_cells()\n",
+ " \n",
+ " \n",
+ " if(done==True):\n",
+ " self.after(7,self.make_move)\n",
+ " else:\n",
+ " time.sleep(3)\n",
+ " self.init_matrix()\n",
+ " self.update_grid_cells()\n",
+ " self.after(7,self.make_move)\n",
+ "\n",
+ " def generate_next(self):\n",
+ " empty_cells = []\n",
+ " for i in range(len(mat)):\n",
+ " for j in range(len(mat)):\n",
+ " if(mat[i][j]==0):\n",
+ " empty_cells.append((i,j))\n",
+ " if(len(empty_cells)==0):\n",
+ " return 0,false\n",
+ " index_pair = empty_cells[random.randint(0,len(empty_cells)-1)]\n",
+ " index = index_pair\n",
+ " self.matrix[index[0]][index[1]] = 2\n",
+ "\n",
+ "root = Tk()\n",
+ "gamegrid = GameGrid()\n",
+ "root.mainloop()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Loss/Loss.png b/Loss/Loss.png
new file mode 100644
index 0000000..8281f73
Binary files /dev/null and b/Loss/Loss.png differ
diff --git a/Loss/Loss1.png b/Loss/Loss1.png
new file mode 100644
index 0000000..38eda78
Binary files /dev/null and b/Loss/Loss1.png differ
diff --git a/Score/Score.png b/Score/Score.png
new file mode 100644
index 0000000..f55e9e5
Binary files /dev/null and b/Score/Score.png differ
diff --git a/Score/Scores.png b/Score/Scores.png
new file mode 100644
index 0000000..fd3c782
Binary files /dev/null and b/Score/Scores.png differ
diff --git a/Train-2048.ipynb b/Train-2048.ipynb
new file mode 100644
index 0000000..61d614b
--- /dev/null
+++ b/Train-2048.ipynb
@@ -0,0 +1,764 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "# 2048 Deep Reinforcement Learning"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Import Required Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "import numpy as np\n",
+ "from copy import deepcopy\n",
+ "import random \n",
+ "import math\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Game Logic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#initialize a new game\n",
+ "def new_game(n):\n",
+ " matrix = np.zeros([n,n])\n",
+ " return matrix\n",
+ "\n",
+ "#add 2 or 4 in the matrix\n",
+ "def add_two(mat):\n",
+ " empty_cells = []\n",
+ " for i in range(len(mat)):\n",
+ " for j in range(len(mat[0])):\n",
+ " if(mat[i][j]==0):\n",
+ " empty_cells.append((i,j))\n",
+ " if(len(empty_cells)==0):\n",
+ " return mat\n",
+ " \n",
+ " index_pair = empty_cells[random.randint(0,len(empty_cells)-1)]\n",
+ " \n",
+ " prob = random.random()\n",
+ " if(prob>=0.9):\n",
+ " mat[index_pair[0]][index_pair[1]]=4\n",
+ " else:\n",
+ " mat[index_pair[0]][index_pair[1]]=2\n",
+ " return mat\n",
+ "\n",
+ "#to check state of the game\n",
+ "def game_state(mat):\n",
+ " #if 2048 in mat:\n",
+ " # return 'win'\n",
+ " \n",
+ " for i in range(len(mat)-1): #intentionally reduced to check the row on the right and below\n",
+ " for j in range(len(mat[0])-1): #more elegant to use exceptions but most likely this will be their solution\n",
+ " if mat[i][j]==mat[i+1][j] or mat[i][j+1]==mat[i][j]:\n",
+ " return 'not over'\n",
+ " \n",
+ " for i in range(len(mat)): #check for any zero entries\n",
+ " for j in range(len(mat[0])):\n",
+ " if mat[i][j]==0:\n",
+ " return 'not over'\n",
+ " \n",
+ " for k in range(len(mat)-1): #to check the left/right entries on the last row\n",
+ " if mat[len(mat)-1][k]==mat[len(mat)-1][k+1]:\n",
+ " return 'not over'\n",
+ " \n",
+ " for j in range(len(mat)-1): #check up/down entries on last column\n",
+ " if mat[j][len(mat)-1]==mat[j+1][len(mat)-1]:\n",
+ " return 'not over'\n",
+ " \n",
+ " return 'lose'\n",
+ "\n",
+ "\n",
+ "def reverse(mat):\n",
+ " new=[]\n",
+ " for i in range(len(mat)):\n",
+ " new.append([])\n",
+ " for j in range(len(mat[0])):\n",
+ " new[i].append(mat[i][len(mat[0])-j-1])\n",
+ " return new\n",
+ "\n",
+ "def transpose(mat):\n",
+ " new=[]\n",
+ " for i in range(len(mat[0])):\n",
+ " new.append([])\n",
+ " for j in range(len(mat)):\n",
+ " new[i].append(mat[j][i])\n",
+ " \n",
+ " return np.transpose(mat)\n",
+ "\n",
+ "def cover_up(mat):\n",
+ " new = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]\n",
+ " done = False\n",
+ " for i in range(4):\n",
+ " count = 0\n",
+ " for j in range(4):\n",
+ " if mat[i][j]!=0:\n",
+ " new[i][count] = mat[i][j]\n",
+ " if j!=count:\n",
+ " done=True\n",
+ " count+=1\n",
+ " return (new,done)\n",
+ "\n",
+ "def merge(mat):\n",
+ " done=False\n",
+ " score = 0\n",
+ " for i in range(4):\n",
+ " for j in range(3):\n",
+ " if mat[i][j]==mat[i][j+1] and mat[i][j]!=0:\n",
+ " mat[i][j]*=2\n",
+ " score += mat[i][j] \n",
+ " mat[i][j+1]=0\n",
+ " done=True\n",
+ " return (mat,done,score)\n",
+ "\n",
+ "#up move\n",
+ "def up(game):\n",
+ " game = transpose(game)\n",
+ " game,done = cover_up(game)\n",
+ " temp = merge(game)\n",
+ " game = temp[0]\n",
+ " done = done or temp[1]\n",
+ " game = cover_up(game)[0]\n",
+ " game = transpose(game)\n",
+ " return (game,done,temp[2])\n",
+ "\n",
+ "#down move\n",
+ "def down(game):\n",
+ " game=reverse(transpose(game))\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " game=transpose(reverse(game))\n",
+ " return (game,done,temp[2])\n",
+ "\n",
+ "#left move\n",
+ "def left(game):\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " return (game,done,temp[2])\n",
+ "\n",
+ "#right move\n",
+ "def right(game):\n",
+ " game=reverse(game)\n",
+ " game,done=cover_up(game)\n",
+ " temp=merge(game)\n",
+ " game=temp[0]\n",
+ " done=done or temp[1]\n",
+ " game=cover_up(game)[0]\n",
+ " game=reverse(game)\n",
+ " return (game,done,temp[2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Controls"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "controls = {0:up,1:left,2:right,3:down}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Important Functions\n",
+ "* Find Empty Cell Function (Used in Reward)\n",
+ "* Convert Input Values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#convert the input game matrix into corresponding power of 2 matrix.\n",
+ "def change_values(X):\n",
+ " power_mat = np.zeros(shape=(1,4,4,16),dtype=np.float32)\n",
+ " for i in range(4):\n",
+ " for j in range(4):\n",
+ " if(X[i][j]==0):\n",
+ " power_mat[0][i][j][0] = 1.0\n",
+ " else:\n",
+ " power = int(math.log(X[i][j],2))\n",
+ " power_mat[0][i][j][power] = 1.0\n",
+ " return power_mat \n",
+ "\n",
+ "#find the number of empty cells in the game matrix.\n",
+ "def findemptyCell(mat):\n",
+ " count = 0\n",
+ " for i in range(len(mat)):\n",
+ " for j in range(len(mat)):\n",
+ " if(mat[i][j]==0):\n",
+ " count+=1\n",
+ " return count"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Hyper Parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#hyper parameters\n",
+ "start_learning_rate = 0.0005\n",
+ "\n",
+ "#gamma for Q-learning\n",
+ "gamma = 0.9\n",
+ "\n",
+ "#epsilon greedy approach\n",
+ "epsilon = 0.9\n",
+ "\n",
+ "#to store states and lables of the game for training\n",
+ "#states of the game\n",
+ "replay_memory = list()\n",
+ "\n",
+ "#labels of the states\n",
+ "replay_labels = list()\n",
+ "\n",
+ "#capacity of memory\n",
+ "mem_capacity = 6000"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Network Architecture\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#first convolution layer depth\n",
+ "depth1 = 128\n",
+ "\n",
+ "#second convolution layer depth\n",
+ "depth2 = 128\n",
+ "\n",
+ "#batch size for batch gradient descent\n",
+ "batch_size = 512\n",
+ "\n",
+ "#input units\n",
+ "input_units = 16\n",
+ "\n",
+ "#fully connected layer neurons\n",
+ "hidden_units = 256\n",
+ "\n",
+ "#output neurons = number of moves\n",
+ "output_units = 4"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Let's make the Tensorflow Graph\n",
+ "* Loss = mean ( square( Q(st,at) - (r + gamma x max(Q(st+1,a))) ) )\n",
+ "* Activation = RELU\n",
+ "* Optimizer = RMSProp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#input data\n",
+ "tf_batch_dataset = tf.placeholder(tf.float32,shape=(batch_size,4,4,16))\n",
+ "tf_batch_labels = tf.placeholder(tf.float32,shape=(batch_size,output_units))\n",
+ "\n",
+ "single_dataset = tf.placeholder(tf.float32,shape=(1,4,4,16))\n",
+ "\n",
+ "\n",
+ "#CONV LAYERS\n",
+ "#conv layer1 weights\n",
+ "conv1_layer1_weights = tf.Variable(tf.truncated_normal([1,2,input_units,depth1],mean=0,stddev=0.01))\n",
+ "conv2_layer1_weights = tf.Variable(tf.truncated_normal([2,1,input_units,depth1],mean=0,stddev=0.01))\n",
+ "\n",
+ "#conv layer2 weights\n",
+ "conv1_layer2_weights = tf.Variable(tf.truncated_normal([1,2,depth1,depth2],mean=0,stddev=0.01))\n",
+ "conv2_layer2_weights = tf.Variable(tf.truncated_normal([2,1,depth1,depth2],mean=0,stddev=0.01))\n",
+ "\n",
+ "\n",
+ "\n",
+ "#FUllY CONNECTED LAYERS\n",
+ "expand_size = 2*4*depth2*2 + 3*3*depth2*2 + 4*3*depth1*2\n",
+ "fc_layer1_weights = tf.Variable(tf.truncated_normal([expand_size,hidden_units],mean=0,stddev=0.01))\n",
+ "fc_layer1_biases = tf.Variable(tf.truncated_normal([1,hidden_units],mean=0,stddev=0.01))\n",
+ "fc_layer2_weights = tf.Variable(tf.truncated_normal([hidden_units,output_units],mean=0,stddev=0.01))\n",
+ "fc_layer2_biases = tf.Variable(tf.truncated_normal([1,output_units],mean=0,stddev=0.01))\n",
+ "\n",
+ "\n",
+ "#model\n",
+ "def model(dataset):\n",
+ " #layer1\n",
+ " conv1 = tf.nn.conv2d(dataset,conv1_layer1_weights,[1,1,1,1],padding='VALID') \n",
+ " conv2 = tf.nn.conv2d(dataset,conv2_layer1_weights,[1,1,1,1],padding='VALID') \n",
+ " \n",
+ " #layer1 relu activation\n",
+ " relu1 = tf.nn.relu(conv1)\n",
+ " relu2 = tf.nn.relu(conv2)\n",
+ " \n",
+ " #layer2\n",
+ " conv11 = tf.nn.conv2d(relu1,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ " conv12 = tf.nn.conv2d(relu1,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ "\n",
+ " conv21 = tf.nn.conv2d(relu2,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ " conv22 = tf.nn.conv2d(relu2,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n",
+ "\n",
+ " #layer2 relu activation\n",
+ " relu11 = tf.nn.relu(conv11)\n",
+ " relu12 = tf.nn.relu(conv12)\n",
+ " relu21 = tf.nn.relu(conv21)\n",
+ " relu22 = tf.nn.relu(conv22)\n",
+ " \n",
+ " #get shapes of all activations\n",
+ " shape1 = relu1.get_shape().as_list()\n",
+ " shape2 = relu2.get_shape().as_list()\n",
+ " \n",
+ " shape11 = relu11.get_shape().as_list()\n",
+ " shape12 = relu12.get_shape().as_list()\n",
+ " shape21 = relu21.get_shape().as_list()\n",
+ " shape22 = relu22.get_shape().as_list()\n",
+ "\n",
+ " #expansion\n",
+ " hidden1 = tf.reshape(relu1,[shape1[0],shape1[1]*shape1[2]*shape1[3]])\n",
+ " hidden2 = tf.reshape(relu2,[shape2[0],shape2[1]*shape2[2]*shape2[3]])\n",
+ " \n",
+ " hidden11 = tf.reshape(relu11,[shape11[0],shape11[1]*shape11[2]*shape11[3]])\n",
+ " hidden12 = tf.reshape(relu12,[shape12[0],shape12[1]*shape12[2]*shape12[3]])\n",
+ " hidden21 = tf.reshape(relu21,[shape21[0],shape21[1]*shape21[2]*shape21[3]])\n",
+ " hidden22 = tf.reshape(relu22,[shape22[0],shape22[1]*shape22[2]*shape22[3]])\n",
+ "\n",
+ " #concatenation\n",
+ " hidden = tf.concat([hidden1,hidden2,hidden11,hidden12,hidden21,hidden22],axis=1)\n",
+ "\n",
+ " #full connected layers\n",
+ " hidden = tf.matmul(hidden,fc_layer1_weights) + fc_layer1_biases\n",
+ " hidden = tf.nn.relu(hidden)\n",
+ "\n",
+ " #output layer\n",
+ " output = tf.matmul(hidden,fc_layer2_weights) + fc_layer2_biases\n",
+ " \n",
+ " #return output\n",
+ " return output\n",
+ "\n",
+ "#for single example\n",
+ "single_output = model(single_dataset)\n",
+ "\n",
+ "#for batch data\n",
+ "logits = model(tf_batch_dataset)\n",
+ "\n",
+ "#loss\n",
+ "loss = tf.square(tf.subtract(tf_batch_labels,logits))\n",
+ "loss = tf.reduce_sum(loss,axis=1,keep_dims=True)\n",
+ "loss = tf.reduce_mean(loss)/2.0\n",
+ "\n",
+ "#optimizer\n",
+ "global_step = tf.Variable(0) # count the number of steps taken.\n",
+ "learning_rate = tf.train.exponential_decay(float(start_learning_rate), global_step, 1000, 0.90, staircase=True)\n",
+ "optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss, global_step=global_step)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#loss\n",
+ "J = []\n",
+ "\n",
+ "#scores\n",
+ "scores = []\n",
+ "\n",
+ "#to store final parameters\n",
+ "final_parameters = {}\n",
+ "\n",
+ "#number of episodes\n",
+ "M = 200001"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create training dataset and Train Simultaneously\n",
+ "* Current Reward = number of merges + log(new max,2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with tf.Session() as session:\n",
+ " tf.global_variables_initializer().run()\n",
+ " print(\"Initialized\")\n",
+ " \n",
+ " global epsilon\n",
+ " global replay_labels\n",
+ " global replay_memory\n",
+ "\n",
+ " #for episode with max score\n",
+ " maximum = -1\n",
+ " episode = -1\n",
+ " \n",
+ " #total_iters \n",
+ " total_iters = 1\n",
+ " \n",
+ " #number of back props\n",
+ " back=0\n",
+ " \n",
+ " for ep in range(M):\n",
+ " global board\n",
+ " board = new_game(4)\n",
+ " add_two(board)\n",
+ " add_two(board)\n",
+ " \n",
+ " #whether episode finished or not\n",
+ " finish = 'not over'\n",
+ " \n",
+ " #total_score of this episode\n",
+ " total_score = 0\n",
+ " \n",
+ " #iters per episode\n",
+ " local_iters = 1\n",
+ " \n",
+ " while(finish=='not over'):\n",
+ " prev_board = deepcopy(board)\n",
+ " \n",
+ " #get the required move for this state\n",
+ " state = deepcopy(board)\n",
+ " state = change_values(state)\n",
+ " state = np.array(state,dtype = np.float32).reshape(1,4,4,16)\n",
+ " feed_dict = {single_dataset:state}\n",
+ " control_scores = session.run(single_output,feed_dict=feed_dict)\n",
+ " \n",
+ " #find the move with max Q value\n",
+ " control_buttons = np.flip(np.argsort(control_scores),axis=1)\n",
+ " \n",
+ " #copy the Q-values as labels\n",
+ " labels = deepcopy(control_scores[0])\n",
+ " \n",
+ " #generate random number for epsilon greedy approach\n",
+ " num = random.uniform(0,1)\n",
+ " \n",
+ " #store prev max\n",
+ " prev_max = np.max(prev_board)\n",
+ " \n",
+ " #num is less epsilon generate random move\n",
+ " if(num10000) or (epsilon>0.1 and total_iters%2500==0)):\n",
+ " epsilon = epsilon/1.005\n",
+ " \n",
+ " \n",
+ " #change the matrix values and store them in memory\n",
+ " prev_state = deepcopy(prev_board)\n",
+ " prev_state = change_values(prev_state)\n",
+ " prev_state = np.array(prev_state,dtype=np.float32).reshape(1,4,4,16)\n",
+ " replay_labels.append(labels)\n",
+ " replay_memory.append(prev_state)\n",
+ " \n",
+ " \n",
+ " #back-propagation\n",
+ " if(len(replay_memory)>=mem_capacity):\n",
+ " back_loss = 0\n",
+ " batch_num = 0\n",
+ " z = list(zip(replay_memory,replay_labels))\n",
+ " np.random.shuffle(z)\n",
+ " np.random.shuffle(z)\n",
+ " replay_memory,replay_labels = zip(*z)\n",
+ " \n",
+ " for i in range(0,len(replay_memory),batch_size):\n",
+ " if(i + batch_size>len(replay_memory)):\n",
+ " break\n",
+ " \n",
+ " batch_data = deepcopy(replay_memory[i:i+batch_size])\n",
+ " batch_labels = deepcopy(replay_labels[i:i+batch_size])\n",
+ " \n",
+ " batch_data = np.array(batch_data,dtype=np.float32).reshape(batch_size,4,4,16)\n",
+ " batch_labels = np.array(batch_labels,dtype=np.float32).reshape(batch_size,output_units)\n",
+ " \n",
+ " feed_dict = {tf_batch_dataset: batch_data, tf_batch_labels: batch_labels}\n",
+ " _,l = session.run([optimizer,loss],feed_dict=feed_dict)\n",
+ " back_loss += l \n",
+ " \n",
+ " print(\"Mini-Batch - {} Back-Prop : {}, Loss : {}\".format(batch_num,back,l))\n",
+ " batch_num +=1\n",
+ " back_loss /= batch_num\n",
+ " J.append(back_loss)\n",
+ " \n",
+ " #store the parameters in a dictionary\n",
+ " final_parameters['conv1_layer1_weights'] = session.run(conv1_layer1_weights)\n",
+ " final_parameters['conv1_layer2_weights'] = session.run(conv1_layer2_weights)\n",
+ " final_parameters['conv2_layer1_weights'] = session.run(conv2_layer1_weights)\n",
+ " final_parameters['conv2_layer2_weights'] = session.run(conv2_layer2_weights)\n",
+ " final_parameters['conv1_layer1_biases'] = session.run(conv1_layer1_biases)\n",
+ " final_parameters['conv1_layer2_biases'] = session.run(conv1_layer2_biases)\n",
+ " final_parameters['conv2_layer1_biases'] = session.run(conv2_layer1_biases)\n",
+ " final_parameters['conv2_layer2_biases'] = session.run(conv2_layer2_biases)\n",
+ " final_parameters['fc_layer1_weights'] = session.run(fc_layer1_weights)\n",
+ " final_parameters['fc_layer2_weights'] = session.run(fc_layer2_weights)\n",
+ " final_parameters['fc_layer1_biases'] = session.run(fc_layer1_biases)\n",
+ " final_parameters['fc_layer2_biases'] = session.run(fc_layer2_biases)\n",
+ " \n",
+ " #number of back-props\n",
+ " back+=1\n",
+ " \n",
+ " #make new memory \n",
+ " replay_memory = list()\n",
+ " replay_labels = list()\n",
+ " \n",
+ " \n",
+ " if(local_iters%400==0):\n",
+ " print(\"Episode : {}, Score : {}, Iters : {}, Finish : {}\".format(ep,total_score,local_iters,finish))\n",
+ " \n",
+ " local_iters += 1\n",
+ " total_iters += 1\n",
+ " \n",
+ " scores.append(total_score)\n",
+ " print(\"Episode {} finished with score {}, result : {} board : {}, epsilon : {}, learning rate : {} \".format(ep,total_score,finish,board,epsilon,session.run(learning_rate)))\n",
+ " print()\n",
+ " \n",
+ " if((ep+1)%1000==0):\n",
+ " print(\"Maximum Score : {} ,Episode : {}\".format(maximum,episode)) \n",
+ " print(\"Loss : {}\".format(J[len(J)-1]))\n",
+ " print()\n",
+ " \n",
+ " if(maximum