# Prepare & install required libraries

In [1]:
!git clone https://github.com/markub3327/OpenAI-CarRacing-RNN.git
%cd OpenAI-CarRacing-RNN/
%ls

Cloning into 'OpenAI-CarRacing-RNN'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 43 (delta 1), reused 0 (delta 0), pack-reused 37[K
Unpacking objects: 100% (43/43), done.
/content/OpenAI-CarRacing-RNN
chart_01.png  chart_03.png  [0m[01;32mLICENSE[0m*  model.png  preview.gif  [01;34msave[0m/
chart_02.png  [01;34mdata[0m/         main.py   [01;34mnets[0m/      [01;32mREADME.md[0m*   [01;34mutils[0m/


In [None]:
!pip3 install wandb

# Importing needed libaries

In [3]:
import os
import numpy as np
import wandb

from nets.agent import Agent
from wandb.keras import WandbCallback 

# Dataset

In [4]:
class Dataset:
    def __init__(self, path='data/', timesteps=4):
        # the list of datasets
        self.dat = []
        self.ptr = 0
        self.timesteps = timesteps

        # scan datasets
        with os.scandir(path) as entries:
            for entry in entries:
                self.dat.append(entry.path)
    
    def _read_dataset(self, path):        
        with np.load(path) as data:
            f = data['frames']
            a = data['actions']
            print(f.shape)
            print(a.shape)

            # make timesteps
            f, a = self._make_timesteps(f, a)
            print(f.shape)
            print(a.shape)

            # shuffle dataset after loading from file
            f, a = self._shuffle_dataset(f, a)
        
        print(f"Loaded dataset from: {path}")

        return f, a

    def _make_timesteps(self, f_dat, a_dat):
        # generate random indexes
        rand_idxs = np.arange(self.timesteps + 1, f_dat.shape[0], dtype=np.int)
        print(rand_idxs)
        print(rand_idxs.shape)

        states = np.zeros((rand_idxs.shape[0], self.timesteps) + f_dat.shape[1:], dtype=np.uint8)
            
        for i, idx in enumerate(rand_idxs):
            states[i] = f_dat[idx-self.timesteps-1:idx-1]

        return states, a_dat[rand_idxs]
        
    def _shuffle_dataset(self, f_dat, a_dat):
        idx = np.arange(0, f_dat.shape[0], dtype=np.int)
        print(idx)
        np.random.shuffle(idx)
        print(idx)

        return f_dat[idx], a_dat[idx]

    def __iter__(self):
        self.ptr = 0
        return self

    def __next__(self):
        if self.ptr == len(self.dat):
           raise StopIteration
        s, a = self._read_dataset(self.dat[self.ptr])
        self.ptr = self.ptr + 1
        return s, a

# The main training function

In [5]:
def Training(hid=[32, 64], num_frames=4):

    wandb.init(project="car_racing")

    # create network
    agent = Agent()
    agent.create((num_frames, 96, 96, 3), hid=hid)

    # save model's plot
    agent.save_plot()
    
    # load datasets from folder
    dataset = Dataset(timesteps=num_frames)

    # take every dataset from folder
    for s, a in dataset:
        print('+-----------------------------------------------+')

        print('Run training...')
        print('|-----------------------------------------------|')
        agent.train(s, a, epochs=1000, top_only=True, callbacks=[WandbCallback()])
        
        print('Run fine-tuning...')
        print('|-----------------------------------------------|')
        agent.train(s, a, epochs=1000, top_only=False, callbacks=[WandbCallback()])

        print('+-----------------------------------------------+')

    # save model
    agent.save()

# Run training

In [6]:
Training()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5
(9319, 96, 96, 3)
(9319, 3)
[   5    6    7 ... 9316 9317 9318]
(9314,)
(9314, 4, 96, 96, 3)
(9314, 3)
[   0    1    2 ... 9311 9312 9313]
[6769 6521 6215 ... 3373 2278 4707]
Loaded dataset from: data/data_part5 (9319).npz
+-----------------------------------------------+
Run training...
|-----------------------------------------------|
Model: "agent"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image_input (InputLayer)        [(None, 4, 96, 96, 3 0                                            
__________________________________________________________________________________________________
normalization (TimeDistributed) (None, 4, 96, 96, 3) 0           image_input[0][0]       

# Results

![chart_01](chart_02.png)![chart_02](chart_01.png)![chart_03](chart_03.png)