# Final Project - Reinforcements Learning
## Test Notebook

---
<br>

### Name and ID:
Student 1: Roei Arpaly, XXXXXXXXX
<br>
Student 2: Ron Darmon, XXXXXXXXX
<br><br>
<img src="https://play-lh.googleusercontent.com/e_oKlKPISbgdzut1H9opevS7-LTB8-8lsmpCdMkhlnqFenZhpjxbLmx7l158-xQQCIY">

### https://github.com/mpSchrader/gym-sokoban

# General

## Installs

In [None]:
%%capture
!sudo apt-get update
!sudo apt-get install -y xvfb ffmpeg freeglut3-dev
!pip install 'imageio==2.4.0'
!pip install gym
!pip install pygame
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install pyvirtualdisplay
!pip install piglet
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y
!pip install gym_sokoban
!git clone https://github.com/avivg7/sokoban-so.git
!unzip /content/sokoban-so/Compress.zip
!gdown --fuzzy 'https://drive.google.com/file/d/16XrmX0aVkDU_s8HPlECAY2OtieVRbvIB/view?usp=share_link'
!gdown --fuzzy 'https://drive.google.com/file/d/1LbNV57j8qPblXWfvRQ1V5wo29zXNbgcg/view?usp=share_link'
!gdown --fuzzy 'https://drive.google.com/file/d/1Lo-ii9tIgoSTUeoMIvB47GeUuw7B9mMl/view?usp=sharing'
!gdown --fuzzy 'https://drive.google.com/file/d/1MzHrgJqmB4sBNkbuf4CzAXT84OGyX4Gl/view?usp=sharing'

In [None]:
!gdown --fuzzy 'https://drive.google.com/file/d/1Dr_7WVWNH_3rCQJNvi6qG1mnN6F2Pb2V/view?usp=sharing'  # gdown to the explainer.md file of this notebook

## Imports

In [None]:
import numpy as np
import gym
from gym.utils import seeding
from gym import error, spaces, utils
from gym import logger as gymlogger
gymlogger.set_level(40) # error only
from soko_pap import *
import glob
import io
import base64
import os
import random
import matplotlib.pyplot as plt
import math
import glob
from pyvirtualdisplay import Display
from IPython.display import HTML
from IPython import display as ipythondisplay
import pygame
import pyvirtualdisplay
import imageio
import IPython
import time


# Keras
from keras.models import Sequential
from keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, ReLU, LeakyReLU
from keras.optimizers import Adam

## Utils

In [None]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
      <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())

    return IPython.display.HTML(tag)
display = pyvirtualdisplay.Display(visible=0, size=(1400, 900)).start()

In [None]:
def video_func(env, agent, fps=10):
    start_time = time.time()
    done = False
    iter = 0
    video_filename = 'imageio.mp4'
    state = env.reset()
    state = np.expand_dims(state, axis=0)
    with imageio.get_writer(video_filename, fps=fps) as video:
      video.append_data(env.render(mode='rgb_array'))
      while (iter < 10) or not done:
        time_passed = int(time.time() - start_time)
        if done or time_passed > 3:
          break
        iter +=1

        action = np.argmax(agent.predict(state, verbose=0)[0])
        action = tiny_action_lookup[action]

        state, reward, done, info = env.step(action, reward_shaping=False)
        state = np.expand_dims(state, axis=0)
        video.append_data(env.render(mode='rgb_array'))
        
    return embed_mp4(video_filename)

In [None]:
tiny_action_lookup = {0:1, 1:2, 2:3, 3:4, 4:9, 5:10, 6:11, 7:12}  # use partial moves

class EnvWrapper(gym.Wrapper):
    def __init__(self, env, dim_room, num_boxes, max_steps, seed=2, mode='tiny_rgb_array', crop=True):
        super().__init__(env)

        self.seed = seed
        self.dim_room = dim_room
        self.num_boxes = num_boxes
        self._max_steps = max_steps
        self._mode = mode
        self._crop = crop
        
        # The wrapper encapsulates and initialize the gym env
        self.env = env(dim_room=self.dim_room, num_boxes=self.num_boxes, max_steps=self._max_steps)
        self.last_state = self.reset()
    
    def xs_observation(self, state):
        if self._mode == 'rgb_array' and not self._crop:
            return state
        elif self._mode == 'rgb_array' and self._crop:
            return state[14:-14, 14:-14, :]
        elif self._mode == 'tiny_rgb_array':
            return state[1:-1, 1:-1, :]
        else:  # grayscale
            return state[1:-1, 1:-1, 0:1]
              
    def step(self, action, reward_shaping=False):
        next_state, reward, done, info = self.env.step(action, observation_mode=self._mode)
        next_state = self.xs_observation(next_state)
        self.num_step += 1
        self._box_position = np.hstack(np.where(self.env.room_state == 4))  # Take box position (4)

        if len(self._box_position) > 0 and reward_shaping:
            distance_to_target = np.sum(np.abs(np.array(self.end_location) - np.array(self._box_position)))
            distance_to_box = -1 + np.sum(np.abs(np.array(self._box_position) - np.array(self.env.player_position)))
            reward -= distance_to_target
            reward -= distance_to_box
        return next_state, reward, done, info
    
    def reset(self):
        if self.seed:
            random.seed(self.seed)
        self.num_step = 0
        state = self.env.reset()
        state = self.env.render(mode=self._mode)

        self.end_location = list(self.env.box_mapping.keys())[0]
        self.start_location = list(self.env.box_mapping.values())[0]
        self._box_position = self.start_location
        state = self.xs_observation(state)
        return state

In [None]:
def build_model(env):
    CNN = Sequential()
    CNN.add(Conv2D(32, kernel_size=5, input_shape=env.reset().shape))
    CNN.add(MaxPooling2D(pool_size=(2, 2)))
    CNN.add(ReLU())
    CNN.add(Conv2D(64, kernel_size=5))
    CNN.add(MaxPooling2D(pool_size=(2, 2)))
    CNN.add(ReLU())
    CNN.add(Conv2D(128, kernel_size=5))
    CNN.add(MaxPooling2D(pool_size=(2, 2)))
    CNN.add(ReLU())
    CNN.add(Flatten())
    CNN.add(Dense(512, activation='ReLU'))
    CNN.add(Dense(8, activation='linear'))
    CNN.compile(loss='mse', optimizer=Adam(learning_rate=0.005, clipnorm=1))
    return CNN

# EX1 - FIX SCENARIO -  PUSH & PULL - ONE BOX

In [None]:
sokoban_env = EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array')
agent = build_model(sokoban_env)
agent.load_weights('/content/ex1_model_weights')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f86a16df430>

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array'), agent)

  self._proc.stdin.write(im.tostring())


---

# EX2 - PUSH & PULL - ONE BOX

In [None]:
#=============== DO NOT DELETE ===============
sok_2 = PushAndPullSokobanEnv(dim_room=(7, 7), num_boxes=1)
# ============================================

In [None]:
sokoban_env = EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', crop=False)
agent2 = build_model(sokoban_env)
agent2.load_weights('/content/ex2_model_weights')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f8692ff71f0>

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=None, crop=False), agent2)

  self._proc.stdin.write(im.tostring())


In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=9, crop=False), agent2)

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=46, crop=False), agent2)

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=99, crop=False), agent2)

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=100, crop=False), agent2)

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=101, crop=False), agent2)

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=102, crop=False), agent2)

In [None]:
video_func(EnvWrapper(env=PushAndPullSokobanEnv, dim_room=(7,7), num_boxes=1, max_steps=20, mode='rgb_array', seed=103, crop=False), agent2)

---