In [1]:
RUNNING_LOCALLY = True

In [2]:
import os

# PLEASE SET YOUR OWN WORKING_DIRECTORY WHEN RUNNING LOCALLY
WORKING_DIRECTORY = "/home/yash/Desktop/Courses/CS2470/Final_Project/working_dir/"

if not RUNNING_LOCALLY:
    os.chdir("/home/yash/")
    print("Current Directory ->", os.getcwd())

    WORKING_DIRECTORY = "/home/yash/working_dir/"

    # Ensure that you are working in the right environment
    !echo $CONDA_PREFIX

LOG_FILE = WORKING_DIRECTORY + "log_file.txt"

def write_to_log(statement, include_blank_line=False):
    try:
        with open(LOG_FILE, "a") as myfile:
            if include_blank_line:
                myfile.write("\n\n" + statement)
            else:
                myfile.write("\n" + statement)
    except:
        # Running this locally may cause errors, and isn't required
        pass

In [None]:
import gym
import time
import numpy as np
from collections import deque
from gym import spaces
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [4]:
class ConcatObs(gym.Wrapper):
    def __init__(self, env, k):
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = \
            spaces.Box(low=0, high=255, shape=((k,) + shp), dtype=env.observation_space.dtype)

    def reset(self):
        ob = self.env.reset()
        for _ in range(self.k):
            self.frames.append(ob)
        return self._get_ob()

    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        self.frames.append(ob)
        return self._get_ob(), reward, done, info

    def _get_ob(self):
        return np.array(self.frames)

In [5]:
# A bunch of wrappers to get us started, please use these
class ObservationWrapper(gym.ObservationWrapper):
    def __init__(self, env, GRAYSCALE=False, NORMALIZE=False):
        self.GRAYSCALE = GRAYSCALE
        self.NORMALIZE = NORMALIZE
        super().__init__(env)
    
    def observation(self, obs):
        # Normalise observation by 255

        
        if self.NORMALIZE:
            obs = obs / 255.0
        # Convert to grayscale -> This isn't quite working right now, but we can update the function quite easily later
        if self.GRAYSCALE:
#             obs = obs
            obs = tf.image.rgb_to_grayscale(obs)
                    
        image = obs[:,2:-9,8:,:]
        image = tf.image.resize(image,[84,84])
        image = tf.transpose(tf.reshape(image, image.shape[:-1]),perm = [1,2,0])
        return image

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, reward):
        # Clip reward between 0 to 1
        return np.clip(reward, 0, 1)
    
class ActionWrapper(gym.ActionWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def action(self, action):
        return action

In [7]:
# Using the wrappers for the environment
env = gym.make("ALE/Riverraid-v5")
env = ObservationWrapper(RewardWrapper(ActionWrapper(ConcatObs(env, 4))), GRAYSCALE=True, NORMALIZE=True)
obs = env.reset()

2022-05-05 18:04:39.528828: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


**Episodes with n_game**

In [8]:
model_name = "random_model"
n_games = 1000

In [10]:
rot_list = np.zeros(n_games)
time_list = np.zeros(n_games)
num_steps = np.zeros(n_games)

for i in range(0, n_games):
    if (i+1)%50 == 0:
        print(f'Running game {i+1}/{n_games}...')
    
    obs = env.reset()
    rTot = 0
    start_time = time.time()
    done = False
    step_count = 0
    
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        step_count += 1
        rTot += reward

    end_time = time.time()
    inter = end_time-start_time
    rot_list[i] = rTot
    time_list[i] = inter
    num_steps[i] = step_count

env.close()

# write_to_log("average_reward: {}".format(sum(rot_list)/n_games), include_blank_line=False)
# write_to_log("average_time: {}".format(sum(time_list)/n_games), include_blank_line=False)
# write_to_log("sum(rot_list[-5:])/5: {}".format(sum(rot_list[-5:])/5), include_blank_line=False)
# write_to_log("sum(time_list[-5:])/5: {}".format(sum(time_list[-5:])/5), include_blank_line=False)

Running game 50/1000...
Running game 100/1000...
Running game 150/1000...
Running game 200/1000...
Running game 250/1000...
Running game 300/1000...
Running game 350/1000...
Running game 400/1000...
Running game 450/1000...
Running game 500/1000...
Running game 550/1000...
Running game 600/1000...
Running game 650/1000...
Running game 700/1000...
Running game 750/1000...
Running game 800/1000...
Running game 850/1000...
Running game 900/1000...
Running game 950/1000...
Running game 1000/1000...


In [13]:
def describe_array(arr):
    return arr.min(), arr.max(), arr.mean(), arr.std()

In [14]:
print(describe_array(num_steps))
print(describe_array(rot_list))
print(describe_array(time_list))

(338.0, 1199.0, 733.901, 134.17822177611387)
(10.0, 41.0, 22.067, 5.119229531872936)
(1.4359190464019775, 6.545130729675293, 3.2691763134002687, 0.68510435066317)


In [15]:
np.save(WORKING_DIRECTORY + model_name + "_" + str(n_games) + "_num_steps", num_steps)
np.save(WORKING_DIRECTORY + model_name + "_" + str(n_games) + "_rot_list", rot_list)
np.save(WORKING_DIRECTORY + model_name + "_" + str(n_games) + "_time_list", time_list)

In [26]:
num_steps = np.load(WORKING_DIRECTORY + model_name + "_" + str(n_games) + "_num_steps.npy")
rot_list = np.load(WORKING_DIRECTORY + model_name + "_" + str(n_games) + "_rot_list.npy")
time_list = np.load(WORKING_DIRECTORY + model_name + "_" + str(n_games) + "_time_list.npy")

In [27]:
print(describe_array(num_steps))
print(describe_array(rot_list))
print(describe_array(time_list))

(503.0, 910.0, 721.8333333333334, 95.1595443919783)
(12.0, 27.0, 21.3, 3.6891733491393435)
(2.417938232421875, 4.532612562179565, 3.4751102924346924, 0.47531624347623735)


In [12]:
print("average_reward: {}".format(rot_list.sum()/n_games))
print("average_time: {}".format(time_list.sum()/n_games))

average_reward: 0.0
average_time: 3.476018190383911


In [None]:
print("average_reward: {}".format(sum(rot_list)/n_games))
print("average_time: {}".format(sum(time_list)/n_games))
print("sum(rot_list[-5:])/5: {}".format(sum(rot_list[-5:])/5))
print("sum(time_list[-5:])/5: {}".format(sum(time_list[-5:])/5))

1. After 100 games:<br>
   average_reward:773.95<br>
   average_time:21.668797335624696<br>

In [173]:
# q_net_copy = keras.models.load_model(model_path)



In [1]:
# q_net_copy.get_weights()