In [1]:
import os

def installWrapper():
    # Install a Drive FUSE wrapper.
    # https://github.com/astrada/google-drive-ocamlfuse
    !apt-get update -qq 2>&1 > /dev/null
    !apt-get install -y -qq software-properties-common python-software-properties module-init-tools
    !add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
    !apt-get update -qq 2>&1 > /dev/null
    !apt-get -y install -qq google-drive-ocamlfuse fuse
    
def auth():
    from google.colab import auth
    auth.authenticate_user()
    
def authorizeWrapper():
    # Generate creds for the Drive FUSE library.
    from google.colab import output
    from oauth2client.client import GoogleCredentials
    import time
    creds = GoogleCredentials.get_application_default()
    import getpass
    # Determine if Drive Fuse credential setup is already complete.
    fuse_credentials_configured = False
    with output.temporary():
      !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1
      # _exit_code is set to the result of the last "!" command.
      fuse_credentials_configured = _exit_code == 0
    # Sleep for a short period to ensure that the previous output has been cleared.
    time.sleep(1)
    if fuse_credentials_configured:
      print('Drive FUSE credentials already configured!')
    else:
      # Work around misordering of STREAM and STDIN in Jupyter.
      # https://github.com/jupyter/notebook/issues/3159
      prompt = !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
      vcode = getpass.getpass(prompt[0] + '\n\nEnter verification code: ')
      !echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
    
def createDriveDir():
    !mkdir -p drive
    !google-drive-ocamlfuse drive
    
installWrapper()
auth()
authorizeWrapper()
createDriveDir()

E: Package 'python-software-properties' has no installation candidate
Drive FUSE credentials already configured!
fuse: mountpoint is not empty
fuse: if you are sure this is safe, use the 'nonempty' mount option


In [0]:
import pickle


def savePickle(name, toSave):
    file = open(name, 'wb')
    pickle.dump(toSave, file)
    file.close()

def loadPickle(name):
    file = open(name, 'rb')
    data = pickle.load(file)
    file.close()
    return data

In [0]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import sgd
from itertools import product as possibleIterations
import matplotlib.pyplot as plt


def get_actions():
    possibleTorques = np.array([-1.0, 0.0, 1.0])
    possibleActions = np.array(
        list(possibleIterations(possibleTorques, possibleTorques, possibleTorques, possibleTorques)))
    return possibleActions


EPISODES = 100000
actions_space = get_actions()

def plot(data):
    x=[]
    y=[]
    for i,j in data:
        x.append(i)
        y.append(j)
    plt.plot(x,y)
    plt.savefig('drive/AIProject/temp.png')

class State:
    def __init__(self, states, actions):
        self.states = states# 4 sattes
        self.actions = actions # 3 actions

    def get_input_layer(self):
        ret = []
        for i in range(3):
            ret = ret + list(self.states[i][0])
            ret.append(actions_space.tolist().index(self.actions[i].tolist()))
        ret = ret + list(self.states[3][0])
        ret = np.array(ret)
        ret = np.reshape(ret, [1, 24*4 + 3])

        return ret



class DeepQAgent:
    def __init__(self, state_size, action_space):
        self.state_size = state_size
        self.action_size = 81
        self.memory = deque(maxlen=20000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.action_space = action_space

    def get_action_from_prediction(self, predict):
        return self.action_space[np.argmax(predict[0])]

    def _build_model(self):
        model = Sequential()
        model.add(Dense(100, input_dim=self.state_size, activation='relu'))  # changed layer count from 24
        model.add(Dense(80, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))  # changed this from linear
        model.compile(loss='mse', optimizer=sgd(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        action_index = self.action_space.tolist().index(action.tolist())
        self.memory.append((state, action_index, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return self.action_space[np.random.choice([i for i in range(len(self.action_space))])]
        act_values = self.model.predict(state.get_input_layer())  # what does this return
        return self.get_action_from_prediction(act_values)

    def replay(self, batch_size, agent2):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(agent2.model.predict(next_state.get_input_layer())[0]))  # Returns q-values
            target_f = agent2.model.predict(state.get_input_layer())
            target_f[0][action] = target
            self.model.fit(state.get_input_layer(), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.epsilon = 0.02
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":
    eVSs = deque(maxlen=1000)
    env = gym.make('BipedalWalker-v2')
    state_size = env.observation_space.shape[0]
    action_size = len(actions_space)
    agent = DeepQAgent(state_size*4 + 3, actions_space)
    agent2 = DeepQAgent(state_size*4 + 3, actions_space)
    agent.load("drive/AIProject/agent1.h5")
    agent2.load("drive/AIProject/agent2.h5")
    done = False
    batch_size = 32
    c = 0
    e = loadPickle('drive/AIProject/iter')
    e, eVSs = e
    recent_average = deque(maxlen=100)
    while True:
        e=e+1
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        total_reward = 0
        prev_state = State([state for i in range(4)], [np.array([-1.0, 0.0, 1.0, 1.0]) for i in range(3)])
        curr_state = State([state for i in range(4)], [np.array([-1.0, 0.0, 1.0, 1.0]) for i in range(3)])
        my_state = deque(maxlen=4)
        my_actions = deque(maxlen=3)
        my_state.append(state)
        flag = True
        for time in range(500):
            c += 1
            # env.render()
            action = agent.act(curr_state)
            my_actions.append(action)

            next_state, reward, done, _ = env.step(action)
            total_reward += reward
            next_state = np.reshape(next_state, [1, state_size])
            state = next_state
            my_state.append(state)

            if done:
                break

            if len(my_state) == 4:
                curr_state = State(my_state, my_actions)
                if flag:
                    prev_state = curr_state
                    flag = False
                agent.remember(prev_state, action, reward, curr_state, done)
                prev_state = curr_state

            if len(agent.memory) > batch_size:
                agent.replay(batch_size, agent2)
            if c >= 1000:
                #print('updating model')
                c = 0
                agent2.model.set_weights(agent.model.get_weights())
        recent_average.append(total_reward)
        av = sum(recent_average) / len(recent_average)
        print("\r episode: {}/{}, score: {} ,Recent Average: {}".format(e, EPISODES, total_reward, av))
        eVSs.append((e+1,av))
        if e % 20 == 0:
            agent.save("drive/AIProject/agent1.h5")
            agent2.save("drive/AIProject/agent2.h5")
            savePickle('drive/AIProject/iter',(e, eVSs))
            plot(eVSs)



  result = entry_point.load(False)


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
 episode: 1821/100000, score: -102.21639395528659 ,Recent Average: -102.21639395528659
 episode: 1822/100000, score: -102.48194042141053 ,Recent Average: -102.34916718834856
 episode: 1823/100000, score: -45.30093380783368 ,Recent Average: -83.3330893948436
 episode: 1824/100000, score: -42.509872119922164 ,Recent Average: -73.12728507611324
 episode: 1825/100000, score: -38.842111115428274 ,Recent Average: -66.27025028397625
 episode: 1826/100000, score: -51.51141206002981 ,Recent Average: -63.8104439133185
 episode: 1827/100000, score: -43.534671450397624 ,Recent Average: -60.913904990044095
 episode: 1828/100000, score: -28.789919199492047 ,Recent Average: -56.89840676622509
 episode: 1829/100000, score: -38.28370847881517 ,Recent Average: -54.83010695651288
 episode: 18

In [9]:
!ls

adc.json  drive  gym  sample_data


In [0]:
os.chdir('gym')

In [11]:
!ls

bin		     LICENSE.md		   scripts		  tox.ini
CODE_OF_CONDUCT.rst  Makefile		   setup.py		  unittest.cfg
docs		     README.rst		   test.dockerfile.14.04  vendor
examples	     requirements_dev.txt  test.dockerfile.16.04
gym		     requirements.txt	   test.dockerfile.18.04


In [12]:
!pip install -e .

Obtaining file:///content/gym
Installing collected packages: gym
  Found existing installation: gym 0.10.9
    Uninstalling gym-0.10.9:
      Successfully uninstalled gym-0.10.9
  Running setup.py develop for gym
Successfully installed gym


In [13]:
!pip install -e .[box2D]

Obtaining file:///content/gym
Collecting box2d-py>=2.3.5 (from gym==0.10.9)
[?25l  Downloading https://files.pythonhosted.org/packages/06/bd/6cdc3fd994b0649dcf5d9bad85bd9e26172308bbe9a421bfc6fdbf5081a6/box2d_py-2.3.8-cp36-cp36m-manylinux1_x86_64.whl (448kB)
[K    100% |████████████████████████████████| 450kB 7.5MB/s 
Installing collected packages: box2d-py, gym
  Found existing installation: gym 0.10.9
    Can't uninstall 'gym'. No files were found to uninstall.
  Running setup.py develop for gym
Successfully installed box2d-py-2.3.8 gym


In [0]:
os.chdir('..')

In [15]:
!ls

adc.json  drive  gym  sample_data


In [0]:
!rm -rf gym

In [0]:
!ls

adc.json  drive  sample_data


In [0]:
!pip uninstall gym

[33mSkipping gym as it is not installed.[0m


In [7]:
!pip install gym

Collecting gym
[?25l  Downloading https://files.pythonhosted.org/packages/d4/22/4ff09745ade385ffe707fb5f053548f0f6a6e7d5e98a2b9d6c07f5b931a7/gym-0.10.9.tar.gz (1.5MB)
[K    100% |████████████████████████████████| 1.5MB 7.6MB/s 
Collecting pyglet>=1.2.0 (from gym)
[?25l  Downloading https://files.pythonhosted.org/packages/1c/fc/dad5eaaab68f0c21e2f906a94ddb98175662cc5a654eee404d59554ce0fa/pyglet-1.3.2-py2.py3-none-any.whl (1.0MB)
[K    100% |████████████████████████████████| 1.0MB 11.5MB/s 
Building wheels for collected packages: gym
  Running setup.py bdist_wheel for gym ... [?25l- \ | / done
[?25h  Stored in directory: /root/.cache/pip/wheels/6c/3a/0e/b86dee98876bb56cdb482cc1f72201035e46d1baf69d10d028
Successfully built gym
Installing collected packages: pyglet, gym
Successfully installed gym-0.10.9 pyglet-1.3.2


In [8]:
!git clone https://github.com/openai/gym

Cloning into 'gym'...
remote: Enumerating objects: 1, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 8373 (delta 0), reused 0 (delta 0), pack-reused 8372[K
Receiving objects: 100% (8373/8373), 3.49 MiB | 8.28 MiB/s, done.
Resolving deltas: 100% (5672/5672), done.


In [0]:
import gym