diff --git a/Architecture/Architecture.JPG b/Architecture/Architecture.JPG new file mode 100644 index 0000000..9f4864f Binary files /dev/null and b/Architecture/Architecture.JPG differ diff --git a/GamePlay.ipynb b/GamePlay.ipynb new file mode 100644 index 0000000..6fbabfa --- /dev/null +++ b/GamePlay.ipynb @@ -0,0 +1,530 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Play 2048 using Trained Weights" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Required Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import pandas as pd\n", + "from copy import deepcopy\n", + "import random \n", + "import math\n", + "import time\n", + "from tkinter import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Restore Architecture" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#conv layer1 depth\n", + "depth1 = 128\n", + "\n", + "#conv layer2 depth\n", + "depth2 = 128\n", + "\n", + "#input depth\n", + "input_depth = 16\n", + "\n", + "#fully conneted hidden layer\n", + "hidden_units = 256\n", + "\n", + "#output layer\n", + "output_units = 4\n", + "\n", + "#shape of weights\n", + "conv1_layer1_shape = [2,1,input_depth,depth1]\n", + "conv1_layer2_shape = [2,1,depth1,depth2]\n", + "conv2_layer1_shape = [1,2,input_depth,depth1]\n", + "conv2_layer2_shape = [1,2,depth1,depth2]\n", + "\n", + "fc_layer1_w_shape = [3*4*depth1*2+ 4*2*depth2*2 + 3*3*depth2*2,hidden_units]\n", + "fc_layer1_b_shape = [hidden_units]\n", + "fc_layer2_w_shape = [hidden_units,output_units]\n", + "fc_layer2_b_shape = [output_units]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load The parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "parameters = dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "path = r'E:\\Study n Work\\Projects\\2048\\Final Weights'\n", + "parameters['conv1_layer1'] = np.array(pd.read_csv(path + r'\\conv1_layer1_weights.csv')['Weight']).reshape(conv1_layer1_shape)\n", + "parameters['conv1_layer2'] = np.array(pd.read_csv(path + r'\\conv1_layer2_weights.csv')['Weight']).reshape(conv1_layer2_shape)\n", + "parameters['conv2_layer1'] = np.array(pd.read_csv(path + r'\\conv2_layer1_weights.csv')['Weight']).reshape(conv2_layer1_shape)\n", + "parameters['conv2_layer2'] = np.array(pd.read_csv(path + r'\\conv2_layer2_weights.csv')['Weight']).reshape(conv2_layer2_shape)\n", + "parameters['fc_layer1_w'] = np.array(pd.read_csv(path + r'\\fc_layer1_weights.csv')['Weight']).reshape(fc_layer1_w_shape)\n", + "parameters['fc_layer1_b'] = np.array(pd.read_csv(path + r'\\fc_layer1_biases.csv')['Weight']).reshape(fc_layer1_b_shape)\n", + "parameters['fc_layer2_w'] = np.array(pd.read_csv(path + r'\\fc_layer2_weights.csv')['Weight']).reshape(fc_layer2_w_shape)\n", + "parameters['fc_layer2_b'] = np.array(pd.read_csv(path + r'\\fc_layer2_biases.csv')['Weight']).reshape(fc_layer2_b_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Game Logic" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def new_game(n):\n", + " matrix = []\n", + "\n", + " for i in range(n):\n", + " matrix.append([0] * n)\n", + " return matrix\n", + "\n", + "def add_two(mat):\n", + " empty_cells = []\n", + " for i in range(len(mat)):\n", + " for j in range(len(mat)):\n", + " if(mat[i][j]==0):\n", + " empty_cells.append((i,j))\n", + " if(len(empty_cells)==0):\n", + " return mat\n", + " index_pair = empty_cells[random.randint(0,len(empty_cells)-1)]\n", + " \n", + " num = np.random.random(1)\n", + " if(num>0.9):\n", + " mat[index_pair[0]][index_pair[1]]=4\n", + " else:\n", + " mat[index_pair[0]][index_pair[1]]=2\n", + " return mat\n", + "\n", + "def game_state(mat):\n", + " for i in range(len(mat)-1): #intentionally reduced to check the row on the right and below\n", + " for j in range(len(mat[0])-1): #more elegant to use exceptions but most likely this will be their solution\n", + " if mat[i][j]==mat[i+1][j] or mat[i][j+1]==mat[i][j]:\n", + " return 'not over'\n", + " for i in range(len(mat)): #check for any zero entries\n", + " for j in range(len(mat[0])):\n", + " if mat[i][j]==0:\n", + " return 'not over'\n", + " for k in range(len(mat)-1): #to check the left/right entries on the last row\n", + " if mat[len(mat)-1][k]==mat[len(mat)-1][k+1]:\n", + " return 'not over'\n", + " for j in range(len(mat)-1): #check up/down entries on last column\n", + " if mat[j][len(mat)-1]==mat[j+1][len(mat)-1]:\n", + " return 'not over'\n", + " return 'lose'\n", + "\n", + "def reverse(mat):\n", + " new=[]\n", + " for i in range(len(mat)):\n", + " new.append([])\n", + " for j in range(len(mat[0])):\n", + " new[i].append(mat[i][len(mat[0])-j-1])\n", + " return new\n", + "\n", + "def transpose(mat):\n", + " new=[]\n", + " for i in range(len(mat[0])):\n", + " new.append([])\n", + " for j in range(len(mat)):\n", + " new[i].append(mat[j][i])\n", + " return new\n", + "\n", + "def cover_up(mat):\n", + " new=[[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]\n", + " done=False\n", + " for i in range(4):\n", + " count=0\n", + " for j in range(4):\n", + " if mat[i][j]!=0:\n", + " new[i][count]=mat[i][j]\n", + " if j!=count:\n", + " done=True\n", + " count+=1\n", + " return (new,done)\n", + "\n", + "def merge(mat):\n", + " done=False\n", + " score = 0\n", + " for i in range(4):\n", + " for j in range(3):\n", + " if mat[i][j]==mat[i][j+1] and mat[i][j]!=0:\n", + " mat[i][j]*=2\n", + " score += mat[i][j] \n", + " mat[i][j+1]=0\n", + " done=True\n", + " return (mat,done,score)\n", + "\n", + "\n", + "def up(game):\n", + " game=transpose(game)\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " game=transpose(game)\n", + " return (game,done,temp[2])\n", + "\n", + "def down(game):\n", + " game=reverse(transpose(game))\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " game=transpose(reverse(game))\n", + " return (game,done,temp[2])\n", + "\n", + "def left(game):\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " return (game,done,temp[2])\n", + "\n", + "def right(game):\n", + " game=reverse(game)\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " game=reverse(game)\n", + " return (game,done,temp[2])\n", + " \n", + "\n", + "def findemptyCell(mat):\n", + " count = 0\n", + " for i in range(len(mat)):\n", + " for j in range(len(mat)):\n", + " if(mat[i][j]==0):\n", + " count+=1\n", + " return count\n", + "\n", + "def change_values(X):\n", + " power_mat = np.zeros(shape=(1,4,4,16),dtype=np.float32)\n", + " for i in range(4):\n", + " for j in range(4):\n", + " if(X[i][j]==0):\n", + " power_mat[0][i][j][0] = 1.0\n", + " else:\n", + " power = int(math.log(X[i][j],2))\n", + " power_mat[0][i][j][power] = 1.0\n", + " return power_mat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Controls" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "controls = {0:up,1:left,2:right,3:down}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resore the Graph" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "learned_graph = tf.Graph()\n", + "\n", + "with learned_graph.as_default():\n", + " \n", + " #input data\n", + " single_dataset = tf.placeholder(tf.float32,shape=(1,4,4,16))\n", + " \n", + " #weights\n", + " \n", + " #conv layer1 weights\n", + " conv1_layer1_weights = tf.constant(parameters['conv1_layer1'],dtype=tf.float32)\n", + " conv1_layer2_weights = tf.constant(parameters['conv1_layer2'],dtype=tf.float32)\n", + " \n", + " #conv layer2 weights\n", + " conv2_layer1_weights = tf.constant(parameters['conv2_layer1'],dtype=tf.float32)\n", + " conv2_layer2_weights = tf.constant(parameters['conv2_layer2'],dtype=tf.float32)\n", + " \n", + " #fully connected parameters\n", + " fc_layer1_weights = tf.constant(parameters['fc_layer1_w'],dtype=tf.float32)\n", + " fc_layer1_biases = tf.constant(parameters['fc_layer1_b'],dtype=tf.float32)\n", + " fc_layer2_weights = tf.constant(parameters['fc_layer2_w'],dtype=tf.float32)\n", + " fc_layer2_biases = tf.constant(parameters['fc_layer2_b'],dtype=tf.float32)\n", + " \n", + " \n", + " #model\n", + " def model(dataset):\n", + " #layer1\n", + " conv1 = tf.nn.conv2d(dataset,conv1_layer1_weights,[1,1,1,1],padding='VALID') \n", + " conv2 = tf.nn.conv2d(dataset,conv2_layer1_weights,[1,1,1,1],padding='VALID') \n", + "\n", + " #layer1 relu activation\n", + " relu1 = tf.nn.relu(conv1)\n", + " relu2 = tf.nn.relu(conv2)\n", + "\n", + " #layer2\n", + " conv11 = tf.nn.conv2d(relu1,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n", + " conv12 = tf.nn.conv2d(relu1,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n", + "\n", + " conv21 = tf.nn.conv2d(relu2,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n", + " conv22 = tf.nn.conv2d(relu2,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n", + "\n", + " #layer2 relu activation\n", + " relu11 = tf.nn.relu(conv11)\n", + " relu12 = tf.nn.relu(conv12)\n", + " relu21 = tf.nn.relu(conv21)\n", + " relu22 = tf.nn.relu(conv22)\n", + "\n", + " #get shapes of all activations\n", + " shape1 = relu1.get_shape().as_list()\n", + " shape2 = relu2.get_shape().as_list()\n", + "\n", + " shape11 = relu11.get_shape().as_list()\n", + " shape12 = relu12.get_shape().as_list()\n", + " shape21 = relu21.get_shape().as_list()\n", + " shape22 = relu22.get_shape().as_list()\n", + "\n", + " #expansion\n", + " hidden1 = tf.reshape(relu1,[shape1[0],shape1[1]*shape1[2]*shape1[3]])\n", + " hidden2 = tf.reshape(relu2,[shape2[0],shape2[1]*shape2[2]*shape2[3]])\n", + "\n", + " hidden11 = tf.reshape(relu11,[shape11[0],shape11[1]*shape11[2]*shape11[3]])\n", + " hidden12 = tf.reshape(relu12,[shape12[0],shape12[1]*shape12[2]*shape12[3]])\n", + " hidden21 = tf.reshape(relu21,[shape21[0],shape21[1]*shape21[2]*shape21[3]])\n", + " hidden22 = tf.reshape(relu22,[shape22[0],shape22[1]*shape22[2]*shape22[3]])\n", + "\n", + " #concatenation\n", + " hidden = tf.concat([hidden1,hidden2,hidden11,hidden12,hidden21,hidden22],axis=1)\n", + "\n", + " #full connected layers\n", + " hidden = tf.matmul(hidden,fc_layer1_weights) + fc_layer1_biases\n", + " hidden = tf.nn.relu(hidden)\n", + "\n", + " #output layer\n", + " output = tf.matmul(hidden,fc_layer2_weights) + fc_layer2_biases\n", + "\n", + " #return output\n", + " return output\n", + "\n", + " #for single example\n", + " single_output = model(single_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Enjoy the Game!" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "SIZE = 500\n", + "GRID_LEN = 4\n", + "GRID_PADDING = 10\n", + "\n", + "BACKGROUND_COLOR_GAME = \"#92877d\"\n", + "BACKGROUND_COLOR_CELL_EMPTY = \"#9e948a\"\n", + "BACKGROUND_COLOR_DICT = { 2:\"#eee4da\", 4:\"#ede0c8\", 8:\"#f2b179\", 16:\"#f59563\", \\\n", + " 32:\"#f67c5f\", 64:\"#f65e3b\", 128:\"#edcf72\", 256:\"#edcc61\", \\\n", + " 512:\"#edc850\", 1024:\"#edc53f\", 2048:\"#edc22e\" }\n", + "\n", + "CELL_COLOR_DICT = { 2:\"#776e65\", 4:\"#776e65\", 8:\"#f9f6f2\", 16:\"#f9f6f2\", \\\n", + " 32:\"#f9f6f2\", 64:\"#f9f6f2\", 128:\"#f9f6f2\", 256:\"#f9f6f2\", \\\n", + " 512:\"#f9f6f2\", 1024:\"#f9f6f2\", 2048:\"#f9f6f2\" }\n", + "\n", + "FONT = (\"Verdana\", 40, \"bold\")\n", + "\n", + "learned_sess = tf.Session(graph=learned_graph)\n", + "\n", + "class GameGrid(Frame):\n", + " def __init__(self):\n", + " Frame.__init__(self)\n", + "\n", + " self.grid()\n", + " self.master.title('2048')\n", + "\n", + " self.grid_cells = []\n", + " self.init_grid()\n", + " self.init_matrix()\n", + " self.update_grid_cells()\n", + " \n", + " self.wait_visibility()\n", + " self.after(10,self.make_move)\n", + " \n", + " def init_grid(self):\n", + " background = Frame(self, bg=BACKGROUND_COLOR_GAME, width=SIZE, height=SIZE)\n", + " background.grid()\n", + " for i in range(GRID_LEN):\n", + " grid_row = []\n", + " for j in range(GRID_LEN):\n", + " cell = Frame(background, bg=BACKGROUND_COLOR_CELL_EMPTY, width=SIZE/GRID_LEN, height=SIZE/GRID_LEN)\n", + " cell.grid(row=i, column=j, padx=GRID_PADDING, pady=GRID_PADDING)\n", + " # font = Font(size=FONT_SIZE, family=FONT_FAMILY, weight=FONT_WEIGHT)\n", + " t = Label(master=cell, text=\"\", bg=BACKGROUND_COLOR_CELL_EMPTY, justify=CENTER, font=FONT, width=4, height=2)\n", + " t.grid()\n", + " grid_row.append(t)\n", + "\n", + " self.grid_cells.append(grid_row)\n", + "\n", + " def gen(self):\n", + " return randint(0, GRID_LEN - 1)\n", + "\n", + " def init_matrix(self):\n", + " self.matrix = new_game(4)\n", + "\n", + " self.matrix=add_two(self.matrix)\n", + " self.matrix=add_two(self.matrix)\n", + "\n", + " def update_grid_cells(self):\n", + " for i in range(GRID_LEN):\n", + " for j in range(GRID_LEN):\n", + " new_number = self.matrix[i][j]\n", + " if new_number == 0:\n", + " self.grid_cells[i][j].configure(text=\"\", bg=BACKGROUND_COLOR_CELL_EMPTY)\n", + " else:\n", + " self.grid_cells[i][j].configure(text=str(new_number), bg=BACKGROUND_COLOR_DICT[new_number], fg=CELL_COLOR_DICT[new_number])\n", + " self.update_idletasks()\n", + " \n", + " def make_move(self):\n", + " output = learned_sess.run([single_output],feed_dict = {single_dataset:change_values(self.matrix)})\n", + " move = np.argmax(output[0])\n", + " self.matrix,done,tempo = controls[move](self.matrix)\n", + " done=True\n", + " \n", + " #if game_state(self.matrix)=='win':\n", + " # self.grid_cells[1][1].configure(text=\"You\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n", + " # self.grid_cells[1][2].configure(text=\"Win!\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n", + " # done=False\n", + " if game_state(self.matrix)=='lose':\n", + " self.grid_cells[1][1].configure(text=\"You\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n", + " self.grid_cells[1][2].configure(text=\"Lose!\",bg=BACKGROUND_COLOR_CELL_EMPTY)\n", + " done=False\n", + " \n", + " self.matrix = add_two(self.matrix)\n", + " self.update_grid_cells()\n", + " \n", + " \n", + " if(done==True):\n", + " self.after(7,self.make_move)\n", + " else:\n", + " time.sleep(3)\n", + " self.init_matrix()\n", + " self.update_grid_cells()\n", + " self.after(7,self.make_move)\n", + "\n", + " def generate_next(self):\n", + " empty_cells = []\n", + " for i in range(len(mat)):\n", + " for j in range(len(mat)):\n", + " if(mat[i][j]==0):\n", + " empty_cells.append((i,j))\n", + " if(len(empty_cells)==0):\n", + " return 0,false\n", + " index_pair = empty_cells[random.randint(0,len(empty_cells)-1)]\n", + " index = index_pair\n", + " self.matrix[index[0]][index[1]] = 2\n", + "\n", + "root = Tk()\n", + "gamegrid = GameGrid()\n", + "root.mainloop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Loss/Loss.png b/Loss/Loss.png new file mode 100644 index 0000000..8281f73 Binary files /dev/null and b/Loss/Loss.png differ diff --git a/Loss/Loss1.png b/Loss/Loss1.png new file mode 100644 index 0000000..38eda78 Binary files /dev/null and b/Loss/Loss1.png differ diff --git a/Score/Score.png b/Score/Score.png new file mode 100644 index 0000000..f55e9e5 Binary files /dev/null and b/Score/Score.png differ diff --git a/Score/Scores.png b/Score/Scores.png new file mode 100644 index 0000000..fd3c782 Binary files /dev/null and b/Score/Scores.png differ diff --git a/Train-2048.ipynb b/Train-2048.ipynb new file mode 100644 index 0000000..61d614b --- /dev/null +++ b/Train-2048.ipynb @@ -0,0 +1,764 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# 2048 Deep Reinforcement Learning" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import Required Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "from copy import deepcopy\n", + "import random \n", + "import math\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Game Logic" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#initialize a new game\n", + "def new_game(n):\n", + " matrix = np.zeros([n,n])\n", + " return matrix\n", + "\n", + "#add 2 or 4 in the matrix\n", + "def add_two(mat):\n", + " empty_cells = []\n", + " for i in range(len(mat)):\n", + " for j in range(len(mat[0])):\n", + " if(mat[i][j]==0):\n", + " empty_cells.append((i,j))\n", + " if(len(empty_cells)==0):\n", + " return mat\n", + " \n", + " index_pair = empty_cells[random.randint(0,len(empty_cells)-1)]\n", + " \n", + " prob = random.random()\n", + " if(prob>=0.9):\n", + " mat[index_pair[0]][index_pair[1]]=4\n", + " else:\n", + " mat[index_pair[0]][index_pair[1]]=2\n", + " return mat\n", + "\n", + "#to check state of the game\n", + "def game_state(mat):\n", + " #if 2048 in mat:\n", + " # return 'win'\n", + " \n", + " for i in range(len(mat)-1): #intentionally reduced to check the row on the right and below\n", + " for j in range(len(mat[0])-1): #more elegant to use exceptions but most likely this will be their solution\n", + " if mat[i][j]==mat[i+1][j] or mat[i][j+1]==mat[i][j]:\n", + " return 'not over'\n", + " \n", + " for i in range(len(mat)): #check for any zero entries\n", + " for j in range(len(mat[0])):\n", + " if mat[i][j]==0:\n", + " return 'not over'\n", + " \n", + " for k in range(len(mat)-1): #to check the left/right entries on the last row\n", + " if mat[len(mat)-1][k]==mat[len(mat)-1][k+1]:\n", + " return 'not over'\n", + " \n", + " for j in range(len(mat)-1): #check up/down entries on last column\n", + " if mat[j][len(mat)-1]==mat[j+1][len(mat)-1]:\n", + " return 'not over'\n", + " \n", + " return 'lose'\n", + "\n", + "\n", + "def reverse(mat):\n", + " new=[]\n", + " for i in range(len(mat)):\n", + " new.append([])\n", + " for j in range(len(mat[0])):\n", + " new[i].append(mat[i][len(mat[0])-j-1])\n", + " return new\n", + "\n", + "def transpose(mat):\n", + " new=[]\n", + " for i in range(len(mat[0])):\n", + " new.append([])\n", + " for j in range(len(mat)):\n", + " new[i].append(mat[j][i])\n", + " \n", + " return np.transpose(mat)\n", + "\n", + "def cover_up(mat):\n", + " new = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]\n", + " done = False\n", + " for i in range(4):\n", + " count = 0\n", + " for j in range(4):\n", + " if mat[i][j]!=0:\n", + " new[i][count] = mat[i][j]\n", + " if j!=count:\n", + " done=True\n", + " count+=1\n", + " return (new,done)\n", + "\n", + "def merge(mat):\n", + " done=False\n", + " score = 0\n", + " for i in range(4):\n", + " for j in range(3):\n", + " if mat[i][j]==mat[i][j+1] and mat[i][j]!=0:\n", + " mat[i][j]*=2\n", + " score += mat[i][j] \n", + " mat[i][j+1]=0\n", + " done=True\n", + " return (mat,done,score)\n", + "\n", + "#up move\n", + "def up(game):\n", + " game = transpose(game)\n", + " game,done = cover_up(game)\n", + " temp = merge(game)\n", + " game = temp[0]\n", + " done = done or temp[1]\n", + " game = cover_up(game)[0]\n", + " game = transpose(game)\n", + " return (game,done,temp[2])\n", + "\n", + "#down move\n", + "def down(game):\n", + " game=reverse(transpose(game))\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " game=transpose(reverse(game))\n", + " return (game,done,temp[2])\n", + "\n", + "#left move\n", + "def left(game):\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " return (game,done,temp[2])\n", + "\n", + "#right move\n", + "def right(game):\n", + " game=reverse(game)\n", + " game,done=cover_up(game)\n", + " temp=merge(game)\n", + " game=temp[0]\n", + " done=done or temp[1]\n", + " game=cover_up(game)[0]\n", + " game=reverse(game)\n", + " return (game,done,temp[2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Controls" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "controls = {0:up,1:left,2:right,3:down}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Important Functions\n", + "* Find Empty Cell Function (Used in Reward)\n", + "* Convert Input Values" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#convert the input game matrix into corresponding power of 2 matrix.\n", + "def change_values(X):\n", + " power_mat = np.zeros(shape=(1,4,4,16),dtype=np.float32)\n", + " for i in range(4):\n", + " for j in range(4):\n", + " if(X[i][j]==0):\n", + " power_mat[0][i][j][0] = 1.0\n", + " else:\n", + " power = int(math.log(X[i][j],2))\n", + " power_mat[0][i][j][power] = 1.0\n", + " return power_mat \n", + "\n", + "#find the number of empty cells in the game matrix.\n", + "def findemptyCell(mat):\n", + " count = 0\n", + " for i in range(len(mat)):\n", + " for j in range(len(mat)):\n", + " if(mat[i][j]==0):\n", + " count+=1\n", + " return count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hyper Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#hyper parameters\n", + "start_learning_rate = 0.0005\n", + "\n", + "#gamma for Q-learning\n", + "gamma = 0.9\n", + "\n", + "#epsilon greedy approach\n", + "epsilon = 0.9\n", + "\n", + "#to store states and lables of the game for training\n", + "#states of the game\n", + "replay_memory = list()\n", + "\n", + "#labels of the states\n", + "replay_labels = list()\n", + "\n", + "#capacity of memory\n", + "mem_capacity = 6000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Network Architecture\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#first convolution layer depth\n", + "depth1 = 128\n", + "\n", + "#second convolution layer depth\n", + "depth2 = 128\n", + "\n", + "#batch size for batch gradient descent\n", + "batch_size = 512\n", + "\n", + "#input units\n", + "input_units = 16\n", + "\n", + "#fully connected layer neurons\n", + "hidden_units = 256\n", + "\n", + "#output neurons = number of moves\n", + "output_units = 4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Let's make the Tensorflow Graph\n", + "* Loss = mean ( square( Q(st,at) - (r + gamma x max(Q(st+1,a))) ) )\n", + "* Activation = RELU\n", + "* Optimizer = RMSProp" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#input data\n", + "tf_batch_dataset = tf.placeholder(tf.float32,shape=(batch_size,4,4,16))\n", + "tf_batch_labels = tf.placeholder(tf.float32,shape=(batch_size,output_units))\n", + "\n", + "single_dataset = tf.placeholder(tf.float32,shape=(1,4,4,16))\n", + "\n", + "\n", + "#CONV LAYERS\n", + "#conv layer1 weights\n", + "conv1_layer1_weights = tf.Variable(tf.truncated_normal([1,2,input_units,depth1],mean=0,stddev=0.01))\n", + "conv2_layer1_weights = tf.Variable(tf.truncated_normal([2,1,input_units,depth1],mean=0,stddev=0.01))\n", + "\n", + "#conv layer2 weights\n", + "conv1_layer2_weights = tf.Variable(tf.truncated_normal([1,2,depth1,depth2],mean=0,stddev=0.01))\n", + "conv2_layer2_weights = tf.Variable(tf.truncated_normal([2,1,depth1,depth2],mean=0,stddev=0.01))\n", + "\n", + "\n", + "\n", + "#FUllY CONNECTED LAYERS\n", + "expand_size = 2*4*depth2*2 + 3*3*depth2*2 + 4*3*depth1*2\n", + "fc_layer1_weights = tf.Variable(tf.truncated_normal([expand_size,hidden_units],mean=0,stddev=0.01))\n", + "fc_layer1_biases = tf.Variable(tf.truncated_normal([1,hidden_units],mean=0,stddev=0.01))\n", + "fc_layer2_weights = tf.Variable(tf.truncated_normal([hidden_units,output_units],mean=0,stddev=0.01))\n", + "fc_layer2_biases = tf.Variable(tf.truncated_normal([1,output_units],mean=0,stddev=0.01))\n", + "\n", + "\n", + "#model\n", + "def model(dataset):\n", + " #layer1\n", + " conv1 = tf.nn.conv2d(dataset,conv1_layer1_weights,[1,1,1,1],padding='VALID') \n", + " conv2 = tf.nn.conv2d(dataset,conv2_layer1_weights,[1,1,1,1],padding='VALID') \n", + " \n", + " #layer1 relu activation\n", + " relu1 = tf.nn.relu(conv1)\n", + " relu2 = tf.nn.relu(conv2)\n", + " \n", + " #layer2\n", + " conv11 = tf.nn.conv2d(relu1,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n", + " conv12 = tf.nn.conv2d(relu1,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n", + "\n", + " conv21 = tf.nn.conv2d(relu2,conv1_layer2_weights,[1,1,1,1],padding='VALID') \n", + " conv22 = tf.nn.conv2d(relu2,conv2_layer2_weights,[1,1,1,1],padding='VALID') \n", + "\n", + " #layer2 relu activation\n", + " relu11 = tf.nn.relu(conv11)\n", + " relu12 = tf.nn.relu(conv12)\n", + " relu21 = tf.nn.relu(conv21)\n", + " relu22 = tf.nn.relu(conv22)\n", + " \n", + " #get shapes of all activations\n", + " shape1 = relu1.get_shape().as_list()\n", + " shape2 = relu2.get_shape().as_list()\n", + " \n", + " shape11 = relu11.get_shape().as_list()\n", + " shape12 = relu12.get_shape().as_list()\n", + " shape21 = relu21.get_shape().as_list()\n", + " shape22 = relu22.get_shape().as_list()\n", + "\n", + " #expansion\n", + " hidden1 = tf.reshape(relu1,[shape1[0],shape1[1]*shape1[2]*shape1[3]])\n", + " hidden2 = tf.reshape(relu2,[shape2[0],shape2[1]*shape2[2]*shape2[3]])\n", + " \n", + " hidden11 = tf.reshape(relu11,[shape11[0],shape11[1]*shape11[2]*shape11[3]])\n", + " hidden12 = tf.reshape(relu12,[shape12[0],shape12[1]*shape12[2]*shape12[3]])\n", + " hidden21 = tf.reshape(relu21,[shape21[0],shape21[1]*shape21[2]*shape21[3]])\n", + " hidden22 = tf.reshape(relu22,[shape22[0],shape22[1]*shape22[2]*shape22[3]])\n", + "\n", + " #concatenation\n", + " hidden = tf.concat([hidden1,hidden2,hidden11,hidden12,hidden21,hidden22],axis=1)\n", + "\n", + " #full connected layers\n", + " hidden = tf.matmul(hidden,fc_layer1_weights) + fc_layer1_biases\n", + " hidden = tf.nn.relu(hidden)\n", + "\n", + " #output layer\n", + " output = tf.matmul(hidden,fc_layer2_weights) + fc_layer2_biases\n", + " \n", + " #return output\n", + " return output\n", + "\n", + "#for single example\n", + "single_output = model(single_dataset)\n", + "\n", + "#for batch data\n", + "logits = model(tf_batch_dataset)\n", + "\n", + "#loss\n", + "loss = tf.square(tf.subtract(tf_batch_labels,logits))\n", + "loss = tf.reduce_sum(loss,axis=1,keep_dims=True)\n", + "loss = tf.reduce_mean(loss)/2.0\n", + "\n", + "#optimizer\n", + "global_step = tf.Variable(0) # count the number of steps taken.\n", + "learning_rate = tf.train.exponential_decay(float(start_learning_rate), global_step, 1000, 0.90, staircase=True)\n", + "optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss, global_step=global_step)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#loss\n", + "J = []\n", + "\n", + "#scores\n", + "scores = []\n", + "\n", + "#to store final parameters\n", + "final_parameters = {}\n", + "\n", + "#number of episodes\n", + "M = 200001" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create training dataset and Train Simultaneously\n", + "* Current Reward = number of merges + log(new max,2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with tf.Session() as session:\n", + " tf.global_variables_initializer().run()\n", + " print(\"Initialized\")\n", + " \n", + " global epsilon\n", + " global replay_labels\n", + " global replay_memory\n", + "\n", + " #for episode with max score\n", + " maximum = -1\n", + " episode = -1\n", + " \n", + " #total_iters \n", + " total_iters = 1\n", + " \n", + " #number of back props\n", + " back=0\n", + " \n", + " for ep in range(M):\n", + " global board\n", + " board = new_game(4)\n", + " add_two(board)\n", + " add_two(board)\n", + " \n", + " #whether episode finished or not\n", + " finish = 'not over'\n", + " \n", + " #total_score of this episode\n", + " total_score = 0\n", + " \n", + " #iters per episode\n", + " local_iters = 1\n", + " \n", + " while(finish=='not over'):\n", + " prev_board = deepcopy(board)\n", + " \n", + " #get the required move for this state\n", + " state = deepcopy(board)\n", + " state = change_values(state)\n", + " state = np.array(state,dtype = np.float32).reshape(1,4,4,16)\n", + " feed_dict = {single_dataset:state}\n", + " control_scores = session.run(single_output,feed_dict=feed_dict)\n", + " \n", + " #find the move with max Q value\n", + " control_buttons = np.flip(np.argsort(control_scores),axis=1)\n", + " \n", + " #copy the Q-values as labels\n", + " labels = deepcopy(control_scores[0])\n", + " \n", + " #generate random number for epsilon greedy approach\n", + " num = random.uniform(0,1)\n", + " \n", + " #store prev max\n", + " prev_max = np.max(prev_board)\n", + " \n", + " #num is less epsilon generate random move\n", + " if(num10000) or (epsilon>0.1 and total_iters%2500==0)):\n", + " epsilon = epsilon/1.005\n", + " \n", + " \n", + " #change the matrix values and store them in memory\n", + " prev_state = deepcopy(prev_board)\n", + " prev_state = change_values(prev_state)\n", + " prev_state = np.array(prev_state,dtype=np.float32).reshape(1,4,4,16)\n", + " replay_labels.append(labels)\n", + " replay_memory.append(prev_state)\n", + " \n", + " \n", + " #back-propagation\n", + " if(len(replay_memory)>=mem_capacity):\n", + " back_loss = 0\n", + " batch_num = 0\n", + " z = list(zip(replay_memory,replay_labels))\n", + " np.random.shuffle(z)\n", + " np.random.shuffle(z)\n", + " replay_memory,replay_labels = zip(*z)\n", + " \n", + " for i in range(0,len(replay_memory),batch_size):\n", + " if(i + batch_size>len(replay_memory)):\n", + " break\n", + " \n", + " batch_data = deepcopy(replay_memory[i:i+batch_size])\n", + " batch_labels = deepcopy(replay_labels[i:i+batch_size])\n", + " \n", + " batch_data = np.array(batch_data,dtype=np.float32).reshape(batch_size,4,4,16)\n", + " batch_labels = np.array(batch_labels,dtype=np.float32).reshape(batch_size,output_units)\n", + " \n", + " feed_dict = {tf_batch_dataset: batch_data, tf_batch_labels: batch_labels}\n", + " _,l = session.run([optimizer,loss],feed_dict=feed_dict)\n", + " back_loss += l \n", + " \n", + " print(\"Mini-Batch - {} Back-Prop : {}, Loss : {}\".format(batch_num,back,l))\n", + " batch_num +=1\n", + " back_loss /= batch_num\n", + " J.append(back_loss)\n", + " \n", + " #store the parameters in a dictionary\n", + " final_parameters['conv1_layer1_weights'] = session.run(conv1_layer1_weights)\n", + " final_parameters['conv1_layer2_weights'] = session.run(conv1_layer2_weights)\n", + " final_parameters['conv2_layer1_weights'] = session.run(conv2_layer1_weights)\n", + " final_parameters['conv2_layer2_weights'] = session.run(conv2_layer2_weights)\n", + " final_parameters['conv1_layer1_biases'] = session.run(conv1_layer1_biases)\n", + " final_parameters['conv1_layer2_biases'] = session.run(conv1_layer2_biases)\n", + " final_parameters['conv2_layer1_biases'] = session.run(conv2_layer1_biases)\n", + " final_parameters['conv2_layer2_biases'] = session.run(conv2_layer2_biases)\n", + " final_parameters['fc_layer1_weights'] = session.run(fc_layer1_weights)\n", + " final_parameters['fc_layer2_weights'] = session.run(fc_layer2_weights)\n", + " final_parameters['fc_layer1_biases'] = session.run(fc_layer1_biases)\n", + " final_parameters['fc_layer2_biases'] = session.run(fc_layer2_biases)\n", + " \n", + " #number of back-props\n", + " back+=1\n", + " \n", + " #make new memory \n", + " replay_memory = list()\n", + " replay_labels = list()\n", + " \n", + " \n", + " if(local_iters%400==0):\n", + " print(\"Episode : {}, Score : {}, Iters : {}, Finish : {}\".format(ep,total_score,local_iters,finish))\n", + " \n", + " local_iters += 1\n", + " total_iters += 1\n", + " \n", + " scores.append(total_score)\n", + " print(\"Episode {} finished with score {}, result : {} board : {}, epsilon : {}, learning rate : {} \".format(ep,total_score,finish,board,epsilon,session.run(learning_rate)))\n", + " print()\n", + " \n", + " if((ep+1)%1000==0):\n", + " print(\"Maximum Score : {} ,Episode : {}\".format(maximum,episode)) \n", + " print(\"Loss : {}\".format(J[len(J)-1]))\n", + " print()\n", + " \n", + " if(maximum