In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gym
import torch
import torch.nn.functional as F
from jupyterplot import ProgressPlot
import torch.nn as nn
import copy

In [2]:
class action_embedder(nn.Module):
    def __init__(self, n_actions, embedding_size):
        super(action_embedder,self).__init__()
        self.embedding_layer = nn.Linear(n_actions,embedding_size)
    
    def forward(self, x):
        return self.embedding_layer(x)


class obs_convolver(nn.Module):
    def __init__(self):
        super(obs_convolver, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 8, stride = 4)
        self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 4, stride = 2)
        self.conv3 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 2, stride = 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        return F.relu(self.conv3(x)).flatten()

class ADRQN(nn.Module):
    def __init__(self, n_actions, embedding_size):
        super(ADRQN, self).__init__()
        self.embedder = action_embedder(n_actions, embedding_size)
        self.convolver = obs_convolver()
        self.lstm = nn.LSTM(input_size = 4096+embedding_size, hidden_size = embedding_size)
        self.out_layer = nn.Linear(embedding_size, n_actions)
        
    def forward(self, observation, action, hidden_state = None, cell_state = None):
        action_embedded = self.embedder(action)
        observation_conved = self.convolver(observation.unsqueeze(0))
        lstm_input = torch.cat([observation_conved, action_embedded]).view(1,1,-1)
        if hidden_state is not None:
            lstm_out, (hn, cn) = self.lstm(lstm_input, (hidden_state, cell_state))
        else:
            lstm_out, (hn, cn) = self.lstm(lstm_input)
        
        q_values = self.out_layer(lstm_out)
        return q_values, (hn, cn)    

def onehot_action(n_actions, action_idx):
    return torch.tensor([int(i == action_idx) for i in range(n_actions)], dtype = torch.float32).cuda()

def preprocess_obs(obs):
    #obs = torch.tensor(obs, dtype = torch.float32).permute(2,0,1)
    #obs = F.interpolate(obs.unsqueeze(0), size = (84,84))
    #return obs[:,1,:,:].cuda()
    obs = torch.tensor(obs, dtype = torch.float32).cuda()
    return obs

def repackage_hidden(h):
    """Wraps hidden states in new Variables, to detach them from their history."""
    return tuple(v.data for v in h)

  
class ADRQN0(nn.Module):
    def __init__(self, n_actions, embedding_size):
        super(ADRQN0, self).__init__()
        self.embedder = action_embedder(n_actions, embedding_size)
        self.obs_layer = nn.Linear(4, 16)
        self.obs_layer2 = nn.Linear(16,32)
        self.lstm = nn.LSTM(input_size = 32+embedding_size, hidden_size = embedding_size)
        self.out_layer = nn.Linear(embedding_size, n_actions)
    
    def forward(self, observation, action, hidden_state = None, cell_state = None):
        action_embedded = self.embedder(action)
        observation = F.relu(self.obs_layer(observation))
        observation = F.relu(self.obs_layer2(observation))
        lstm_input = torch.cat([observation, action_embedded]).view(1,1,-1)
        if hidden_state is not None:
            lstm_out, (hn, cn) = self.lstm(lstm_input, (hidden_state, cell_state))
        else:
            lstm_out, (hn, cn) = self.lstm(lstm_input)
    
        q_values = self.out_layer(lstm_out)
        return q_values, (hn, cn)  
    
class ExpBuffer():
    def __init__(self, max_episodes):
        self.max_episodes = max_episodes
        self.current_episode = -1
        self.filled_episodes = 0
        self.storage = [[] for i in range(max_episodes)]
    
    def init_episode(self):
        self.current_episode += 1
        if self.filled_episodes < self.max_episodes:
            self.filled_episodes += 1
        if self.current_episode == self.max_episodes:
            self.current_episode = 0
            self.storage[self.current_episode] = []
        else:
            self.storage[self.current_episode] = []

    
    def write_tuple(self, aoaro):
        self.storage[self.current_episode].append(aoaro)
    
    def sample(self, n_tuples, max_tries = 1000):
        for i in range(max_tries):
            episode_idx = np.random.randint(self.filled_episodes)
            if len(self.storage[episode_idx]) >= n_tuples:
                break
            if i == max_tries-1:
                raise Exception('Could not find long enough episode. Try reducing n_tuples.')
        start_idx = np.random.randint(len(self.storage[episode_idx]) - n_tuples + 1)
        return self.storage[episode_idx][start_idx:start_idx+n_tuples]

In [19]:
#Yuge Loop
import time

env = gym.make('CartPole-v1')
n_actions = env.action_space.n
embedding_size = 8
M_episodes = 2000
replay_buffer_size = 100
replay_buffer = ExpBuffer(replay_buffer_size)
sample_length = 2
batch_size = 5
eps = 0.1
gamma = 0.99

pp = ProgressPlot()

#Init Networks
adrqn = ADRQN0(n_actions, embedding_size).cuda()
adrqn_target = ADRQN0(n_actions, embedding_size).cuda()

for param in adrqn_target.parameters():
    param.requires_grad = False

optimizer = torch.optim.Adam(adrqn.parameters(), lr = 0.01)

for i_episode in range(M_episodes):
    #if i_episode > replay_buffer_size:
    #    eps = 0.25
    #if i_episode > 5*replay_buffer_size:
    #    eps = 0.01
    now = time.time()
    #print(f'CURRENT EPISODE: {i_episode}')
    done = False
    last_action = 0
    current_return = 0
    eptime = 0 
    last_observation = preprocess_obs(env.reset()) 
    replay_buffer.init_episode()
    while done == False:
        env.render()
        if eptime == 0:
            qvals, hidden_states = adrqn(last_observation, onehot_action(n_actions,last_action))
        else:
            qvals, hidden_states = adrqn(last_observation, onehot_action(n_actions,last_action), hidden_states[0], hidden_states[1])
        
        greedy_prob = np.random.uniform()
        if greedy_prob < eps:
            action = np.random.randint(n_actions)
        else:
            action = int(torch.argmax(qvals))
        
        #print(action)

        observation, reward, done, info = env.step(action)
        eptime += 1
        current_return += reward
        reward = np.sign(reward)
        observation = preprocess_obs(observation)
        #print(reward)
        replay_buffer.write_tuple((last_action, last_observation, action, reward, observation, done))
        
        last_action = action
        last_observation = observation

    
        #Updating Networks
        if i_episode > 1:
            #Give the agent some time to explore
            for i_batch in range(batch_size):
                transitions = replay_buffer.sample(sample_length)
                #print(transitions)
                for (j, transition) in enumerate(transitions):
                    #...do something
                    if  transition[-1] == True:
                        target_value = transition[3]
                    elif j == 0:
                        target_inference, target_hidden = adrqn_target(transition[4], onehot_action(n_actions, transition[2]))
                        target_value = transition[3] + gamma*torch.max(target_inference)
                    else:
                        target_inference, target_hidden = adrqn_target(transition[4], onehot_action(n_actions, transition[2]),
                                                                      target_hidden[0], target_hidden[1])
                        target_value = transition[3] + gamma*torch.max(target_inference)

                    if j == 0:
                        predicted_value, predicted_hidden = adrqn(transition[1], onehot_action(n_actions, transition[0]))
                    else:
                        predicted_value, predicted_hidden = adrqn(transition[1], onehot_action(n_actions, transition[0]),
                                                                predicted_hidden[0], predicted_hidden[1])

                    optimizer.zero_grad()
                    loss = (target_value - predicted_value[0,0,transition[2]])**2
                    if j != 0:
                        print(f'BEFORE: {loss.item()}')
                    loss.backward(retain_graph = True)
                    if j != 0:
                        predicted_value, predicted_hidden = adrqn(transition[1], onehot_action(n_actions, transition[0]),
                                                                predicted_hidden[0], predicted_hidden[1])
                        print(f'AFTER: {(target_value - predicted_value[0,0,transition[2]])**2}')
                    optimizer.step()
                    #predicted_hidden = repackage_hidden(predicted_hidden)
                    
                    #Maybe the hidden states shouldnt be kept separately from the nets???
                    
                    #WHY NEED TO REPACKAGE TARGET???????    
                target_hidden = repackage_hidden(target_hidden)
            
            #print(i_batch)
            #print(loss.item())

    #print(f"Return: {current_return}")
    pp.update(current_return)
    
    if (i_episode % 1 == 0):
        #print("UPDATED")
        adrqn_target.load_state_dict(adrqn.state_dict())
        for param in adrqn_target.parameters():
            param.requires_grad = False 
    
    print(f'Finished Episode {i_episode}')
    print(f'Took {time.time()-now}')

env.close()

#TODO: UPDATE TARGET NETWORK OTHERWISE WE DO NOT LEARN ANYTHING

<IPython.core.display.Javascript object>

Finished Episode 0
Took 0.21926593780517578
Finished Episode 1
Took 0.15010833740234375
BEFORE: 0.8723238706588745
AFTER: 0.857043981552124
BEFORE: 0.583187460899353
AFTER: 0.5417116284370422
BEFORE: 0.3061524033546448
AFTER: 0.248643159866333
BEFORE: 0.16509611904621124
AFTER: 0.11682689934968948
BEFORE: 0.040840908885002136
AFTER: 0.018266895785927773
BEFORE: 0.023360872641205788
AFTER: 0.005727988202124834
BEFORE: 0.00017250828386750072
AFTER: 0.00981020275503397
BEFORE: 0.026128970086574554
AFTER: 0.0631052553653717
BEFORE: 0.10771694779396057
AFTER: 0.15686947107315063
BEFORE: 0.1209973618388176
AFTER: 0.16414791345596313
BEFORE: 0.09607025235891342
AFTER: 0.128611758351326
BEFORE: 0.04569971561431885
AFTER: 0.07166658341884613
BEFORE: 0.024402068927884102
AFTER: 0.041753366589546204
BEFORE: 0.01126669067889452
AFTER: 0.021815557032823563
BEFORE: 0.0047568888403475285
AFTER: 0.011545965448021889
BEFORE: 0.0008866798016242683
AFTER: 0.004015075508505106
BEFORE: 4.384963176562451e-0

BEFORE: 0.029347997158765793
AFTER: 0.02469852939248085
BEFORE: 0.033217158168554306
AFTER: 0.028353920206427574
BEFORE: 0.030588528141379356
AFTER: 0.025858856737613678
BEFORE: 0.02769830822944641
AFTER: 0.023260077461600304
BEFORE: 0.020763283595442772
AFTER: 0.016885217279195786
BEFORE: 0.01535485778003931
AFTER: 0.012053967453539371
BEFORE: 0.009689857251942158
AFTER: 0.007086934521794319
BEFORE: 0.008724851533770561
AFTER: 0.004783631302416325
BEFORE: 0.0024885167367756367
AFTER: 0.0012514495756477118
BEFORE: 0.0008143895538523793
AFTER: 0.00019296599202789366
BEFORE: 0.003273635171353817
AFTER: 0.0010725554311648011
BEFORE: 3.102151822531596e-05
AFTER: 0.0004212845233269036
BEFORE: 0.00016468892863485962
AFTER: 0.0007659932016395032
BEFORE: 0.00041448077536188066
AFTER: 0.0012431808281689882
BEFORE: 0.0008125400054268539
AFTER: 0.0019057518802583218
BEFORE: 0.0007653466891497374
AFTER: 0.0018268185667693615
BEFORE: 0.0006041647866368294
AFTER: 0.0015659809578210115
BEFORE: 0.0003

BEFORE: 10.779319763183594
AFTER: 11.105123519897461
BEFORE: 0.012254096567630768
AFTER: 0.022707931697368622
BEFORE: 0.027922075241804123
AFTER: 0.015184218063950539
BEFORE: 0.12720389664173126
AFTER: 0.09560967981815338
BEFORE: 0.08686953783035278
AFTER: 0.06339558959007263
BEFORE: 0.08544346690177917
AFTER: 0.06193788722157478
BEFORE: 0.18801677227020264
AFTER: 0.1483180820941925
BEFORE: 0.1180940791964531
AFTER: 0.08682934939861298
BEFORE: 8.868950843811035
AFTER: 9.156026840209961
BEFORE: 0.0019993395544588566
AFTER: 8.114665433822665e-06
BEFORE: 0.015240560285747051
AFTER: 0.006746191065758467
BEFORE: 0.015752455219626427
AFTER: 0.007182430475950241
BEFORE: 0.14312131702899933
AFTER: 0.10922355949878693
BEFORE: 0.09276678413152695
AFTER: 0.06499150395393372
BEFORE: 0.053360313177108765
AFTER: 0.08035235106945038
BEFORE: 0.00371441338211298
AFTER: 0.0004057609476149082
BEFORE: 0.00032964220736175776
AFTER: 0.0035423757508397102
BEFORE: 0.01439722627401352
AFTER: 0.0293390937149524

BEFORE: 0.008051862008869648
AFTER: 0.016214342787861824
BEFORE: 0.015334301628172398
AFTER: 0.005671917926520109
BEFORE: 0.007333139888942242
AFTER: 0.001187726273201406
BEFORE: 0.008442781865596771
AFTER: 0.02201472595334053
BEFORE: 0.009587474167346954
AFTER: 0.02353549376130104
BEFORE: 0.005804188549518585
AFTER: 0.0010974816977977753
BEFORE: 0.0023190255742520094
AFTER: 0.00011748360702767968
BEFORE: 0.002950854366645217
AFTER: 1.2834145309170708e-05
BEFORE: 0.0014028832083567977
AFTER: 0.00017928105080500245
BEFORE: 16.96245574951172
AFTER: 17.384153366088867
BEFORE: 0.006404642481356859
AFTER: 0.0007199106039479375
BEFORE: 0.014097621664404869
AFTER: 0.022900190204381943
BEFORE: 0.021948695182800293
AFTER: 0.009820548817515373
BEFORE: 0.0002771338913589716
AFTER: 0.004720060620456934
BEFORE: 0.14140069484710693
AFTER: 0.10688356310129166
BEFORE: 0.020447945222258568
AFTER: 0.009137561544775963
BEFORE: 0.001104443334043026
AFTER: 1.1584926141949836e-05
BEFORE: 0.14129455387592316

BEFORE: 0.004749590530991554
AFTER: 0.02086668275296688
BEFORE: 0.005013155750930309
AFTER: 0.02141173556447029
BEFORE: 29.9090576171875
AFTER: 30.642168045043945
BEFORE: 0.015756165608763695
AFTER: 0.04036429151892662
BEFORE: 0.005853481125086546
AFTER: 0.02304951287806034
BEFORE: 0.0010052613215520978
AFTER: 0.01143764890730381
BEFORE: 0.12258072197437286
AFTER: 0.18089276552200317
BEFORE: 22.513765335083008
AFTER: 23.063243865966797
BEFORE: 0.808241605758667
AFTER: 0.7123414278030396
BEFORE: 29.410097122192383
AFTER: 30.227611541748047
BEFORE: 27.85914421081543
AFTER: 28.635719299316406
BEFORE: 0.0008936437661759555
AFTER: 0.001626195851713419
BEFORE: 0.044717397540807724
AFTER: 0.020551582798361778
BEFORE: 0.4438900053501129
AFTER: 0.3596177399158478
BEFORE: 0.45597487688064575
AFTER: 0.3715517818927765
BEFORE: 0.5533294677734375
AFTER: 0.4605216681957245
BEFORE: 1.4055055379867554
AFTER: 1.2841686010360718
BEFORE: 0.3520856499671936
AFTER: 0.2791248857975006
BEFORE: 1.247455954551

BEFORE: 0.00031698759994469583
AFTER: 0.009933430701494217
BEFORE: 0.05814683437347412
AFTER: 0.02463928982615471
BEFORE: 46.91511535644531
AFTER: 48.082061767578125
BEFORE: 44.588966369628906
AFTER: 45.80659484863281
BEFORE: 0.6305195093154907
AFTER: 0.5393122434616089
BEFORE: 0.8459018468856812
AFTER: 0.7302155494689941
BEFORE: 0.9740136861801147
AFTER: 0.8324620127677917
BEFORE: 1.2425801753997803
AFTER: 1.1300910711288452
BEFORE: 1.2599279880523682
AFTER: 1.1464682817459106
BEFORE: 1.2227243185043335
AFTER: 1.1117994785308838
BEFORE: 0.8502702713012695
AFTER: 0.7059047222137451
BEFORE: 0.020857591181993484
AFTER: 0.007473372388631105
BEFORE: 0.6103008985519409
AFTER: 0.4954501688480377
BEFORE: 0.5162808299064636
AFTER: 0.41108861565589905
BEFORE: 0.4323655366897583
AFTER: 0.33581772446632385
BEFORE: 0.3514685332775116
AFTER: 0.26415181159973145
BEFORE: 0.27282392978668213
AFTER: 0.1959735006093979
BEFORE: 0.3387303948402405
AFTER: 0.27239173650741577
BEFORE: 0.0010639057727530599
A

AFTER: 0.2962490916252136
BEFORE: 0.00014836039918009192
AFTER: 0.015992391854524612
BEFORE: 0.3431204855442047
AFTER: 0.22998134791851044
BEFORE: 0.004336130805313587
AFTER: 0.03267017751932144
BEFORE: 0.26363056898117065
AFTER: 0.1651824414730072
BEFORE: 0.2249988466501236
AFTER: 0.13459284603595734
BEFORE: 0.18681700527668
AFTER: 0.10514933615922928
BEFORE: 0.14857125282287598
AFTER: 0.07670440524816513
BEFORE: 0.11300013959407806
AFTER: 0.05157284811139107
BEFORE: 0.017560718581080437
AFTER: 0.061590664088726044
BEFORE: 0.01749405451118946
AFTER: 0.06146292760968208
BEFORE: 0.045293185859918594
AFTER: 0.010475180111825466
BEFORE: 0.01488789264112711
AFTER: 0.05642310157418251
BEFORE: 0.024215092882514
AFTER: 0.001978584099560976
BEFORE: 0.017566027119755745
AFTER: 0.000446903781266883
BEFORE: 0.012513574212789536
AFTER: 5.065521690994501e-08
BEFORE: 0.008514437824487686
AFTER: 0.0003838576376438141
BEFORE: 0.005317860282957554
AFTER: 0.0015340108657255769
BEFORE: 0.0067902845330536

BEFORE: 0.0036052020732313395
AFTER: 0.035520538687705994
BEFORE: 0.0033927790354937315
AFTER: 0.03483832627534866
BEFORE: 0.2225618064403534
AFTER: 0.12313193827867508
BEFORE: 131.36572265625
AFTER: 134.3242645263672
BEFORE: 0.15186157822608948
AFTER: 0.07176963984966278
BEFORE: 0.12469160556793213
AFTER: 0.05331648513674736
BEFORE: 0.09940626472234726
AFTER: 0.03711139038205147
BEFORE: 0.31028422713279724
AFTER: 0.1902722269296646
BEFORE: 0.39385026693344116
AFTER: 0.25768566131591797
BEFORE: 0.44584086537361145
AFTER: 0.3006106913089752
BEFORE: 0.03935632482171059
AFTER: 0.005536913406103849
BEFORE: 0.031023560091853142
AFTER: 0.0026944917626678944
BEFORE: 0.4748893976211548
AFTER: 0.3249551057815552
BEFORE: 0.018649274483323097
AFTER: 0.0001412685087416321
BEFORE: 0.01419653370976448
AFTER: 3.2774558349046856e-05
BEFORE: 0.39380598068237305
AFTER: 0.2578948438167572
BEFORE: 0.34519949555397034
AFTER: 0.21845614910125732
BEFORE: 0.006074641831219196
AFTER: 0.0022470762487500906
BEFO

BEFORE: 0.5018802285194397
AFTER: 0.33703964948654175
BEFORE: 0.9600813388824463
AFTER: 0.7358609437942505
BEFORE: 0.5556331872940063
AFTER: 0.38195115327835083
BEFORE: 0.8125883340835571
AFTER: 0.6061729788780212
BEFORE: 0.7454192042350769
AFTER: 0.547725260257721
BEFORE: 0.6778548955917358
AFTER: 0.48939642310142517
BEFORE: 0.605208158493042
AFTER: 0.42729735374450684
BEFORE: 0.5260432362556458
AFTER: 0.36049845814704895
BEFORE: 134.87005615234375
AFTER: 137.8014678955078
BEFORE: 0.3268093764781952
AFTER: 0.19582998752593994
BEFORE: 0.28435033559799194
AFTER: 0.16295818984508514
BEFORE: 0.23794756829738617
AFTER: 0.12797069549560547
BEFORE: 0.19544191658496857
AFTER: 0.0970325917005539
BEFORE: 0.15442024171352386
AFTER: 0.0685456320643425
BEFORE: 0.11475148797035217
AFTER: 0.04284180328249931
BEFORE: 0.07964914292097092
AFTER: 0.02244298718869686
BEFORE: 0.8486301302909851
AFTER: 0.638454020023346
BEFORE: 0.030696872621774673
AFTER: 0.0017286750953644514
BEFORE: 0.016170168295502663


BEFORE: 0.6236374378204346
AFTER: 0.40161001682281494
BEFORE: 0.4935294985771179
AFTER: 0.2976241409778595
BEFORE: 0.37276655435562134
AFTER: 0.20479021966457367
BEFORE: 0.15761908888816833
AFTER: 0.05718864127993584
BEFORE: 0.13977085053920746
AFTER: 0.04653807356953621
BEFORE: 0.1532905399799347
AFTER: 0.05337199196219444
BEFORE: 0.1035621389746666
AFTER: 0.026600444689393044
BEFORE: 0.08767896145582199
AFTER: 0.018796993419528008
BEFORE: 218.59030151367188
AFTER: 223.41033935546875
BEFORE: 0.06302657723426819
AFTER: 0.008378935046494007
BEFORE: 0.3155035376548767
AFTER: 0.16276729106903076
BEFORE: 0.044705700129270554
AFTER: 0.0026495333295315504
BEFORE: 0.5460087060928345
AFTER: 0.3397669196128845
BEFORE: 0.03167635202407837
AFTER: 0.0003110053949058056
BEFORE: 0.6481084227561951
AFTER: 0.42240405082702637
BEFORE: 202.54238891601562
AFTER: 206.98094177246094
BEFORE: 0.02051495760679245
AFTER: 0.0003066479985136539
BEFORE: 1.2112243175506592
AFTER: 0.9010198712348938
BEFORE: 1.38444

AFTER: 1.1800349950790405
BEFORE: 1.625196099281311
AFTER: 1.2414387464523315
BEFORE: 0.00021018495317548513
AFTER: 0.02655908651649952
BEFORE: 7.862836355343461e-05
AFTER: 0.02844785898923874
BEFORE: 1.6886940002441406
AFTER: 1.2979161739349365
BEFORE: 2.1836967789568007e-07
AFTER: 0.03172049671411514
BEFORE: 1.6301358938217163
AFTER: 1.2461568117141724
BEFORE: 1.5708653926849365
AFTER: 1.1938494443893433
BEFORE: 1.4766424894332886
AFTER: 1.1110022068023682
BEFORE: 1.3586629629135132
AFTER: 1.007873773574829
BEFORE: 0.00023974980285856873
AFTER: 0.03735687583684921
BEFORE: 1.1324514150619507
AFTER: 0.8123974800109863
BEFORE: 0.00035641423892229795
AFTER: 0.03869505599141121
BEFORE: 242.90740966796875
AFTER: 248.0439910888672
BEFORE: 0.00045961351133883
AFTER: 0.039718516170978546
BEFORE: 0.0005014138878323138
AFTER: 0.04010336846113205
BEFORE: 0.0005336053436622024
AFTER: 0.04038958624005318
BEFORE: 235.591064453125
AFTER: 240.55638122558594
BEFORE: 0.0006077448488213122
AFTER: 0.0410

BEFORE: 3.546196937561035
AFTER: 2.956285238265991
BEFORE: 0.03716542199254036
AFTER: 3.267635838710703e-05
BEFORE: 0.03164105862379074
AFTER: 8.752621943131089e-05
BEFORE: 0.026247302070260048
AFTER: 0.0006453620153479278
BEFORE: 0.021208617836236954
AFTER: 0.001760941813699901
BEFORE: 0.016675390303134918
AFTER: 0.00343948882073164
BEFORE: 2.796543836593628
AFTER: 2.268566131591797
BEFORE: 0.00976684782654047
AFTER: 0.00797315500676632
BEFORE: 2.5787487030029297
AFTER: 2.0706801414489746
BEFORE: 2.4276986122131348
AFTER: 1.9340766668319702
BEFORE: 0.004687618464231491
AFTER: 0.014398828148841858
BEFORE: 2.0887904167175293
AFTER: 1.6297121047973633
BEFORE: 1.9241899251937866
AFTER: 1.4831161499023438
BEFORE: 1.7608369588851929
AFTER: 1.338564157485962
BEFORE: 317.93414306640625
AFTER: 324.7004089355469
BEFORE: 0.04014386609196663
AFTER: 0.0001830806868383661
BEFORE: 0.10452497005462646
AFTER: 0.019037818536162376
BEFORE: 304.767578125
AFTER: 311.2319641113281
BEFORE: 0.402818441390991

BEFORE: 0.638620138168335
AFTER: 0.3778189718723297
BEFORE: 0.3979041278362274
AFTER: 0.19824886322021484
BEFORE: 0.3717494606971741
AFTER: 0.17971274256706238
BEFORE: 0.3437967598438263
AFTER: 0.1602286547422409
BEFORE: 0.3172672986984253
AFTER: 0.14208360016345978
BEFORE: 0.29222962260246277
AFTER: 0.1253093183040619
BEFORE: 0.26672545075416565
AFTER: 0.10862317681312561
BEFORE: 0.30111393332481384
AFTER: 0.13068348169326782
BEFORE: 0.22044239938259125
AFTER: 0.07957648485898972
BEFORE: 0.19806887209415436
AFTER: 0.06622753292322159
BEFORE: 0.2288312017917633
AFTER: 0.08430420607328415
BEFORE: 0.15927724540233612
AFTER: 0.04446405544877052
BEFORE: 0.14249081909656525
AFTER: 0.03572068735957146
BEFORE: 0.17351363599300385
AFTER: 0.05191647261381149
BEFORE: 0.15503396093845367
AFTER: 0.041941333562135696
BEFORE: 0.10645902901887894
AFTER: 0.018827339634299278
BEFORE: 0.09667524695396423
AFTER: 0.01480631809681654
BEFORE: 0.08704530447721481
AFTER: 0.011152643710374832
BEFORE: 0.0996415

AFTER: 321.1982421875
BEFORE: 5.600001335144043
AFTER: 5.025574684143066
BEFORE: 2.0662198066711426
AFTER: 1.7343618869781494
BEFORE: 4.941506385803223
AFTER: 4.188323020935059
BEFORE: 4.4674248695373535
AFTER: 3.7448134422302246
BEFORE: 4.37107515335083
AFTER: 3.6609926223754883
BEFORE: 1.8579585552215576
AFTER: 1.543039321899414
BEFORE: 1.2918606996536255
AFTER: 1.0310486555099487
BEFORE: 1.699493169784546
AFTER: 1.3975234031677246
BEFORE: 3.925278902053833
AFTER: 3.2528295516967773
BEFORE: 0.565757691860199
AFTER: 0.31015464663505554
BEFORE: 0.5281369686126709
AFTER: 0.2773677110671997
BEFORE: 1.3447678089141846
AFTER: 1.0754534006118774
BEFORE: 1.07319974899292
AFTER: 0.8335796594619751
BEFORE: 1.1511693000793457
AFTER: 0.9018166661262512
BEFORE: 4.495341777801514
AFTER: 3.975790500640869
BEFORE: 2.9841935634613037
AFTER: 2.3935954570770264
BEFORE: 324.51739501953125
AFTER: 331.0697326660156
BEFORE: 0.42180055379867554
AFTER: 0.275638610124588
BEFORE: 0.8009848594665527
AFTER: 0.59

BEFORE: 328.89007568359375
AFTER: 333.55419921875
BEFORE: 5.883063793182373
AFTER: 5.281761169433594
BEFORE: 0.31492525339126587
AFTER: 0.184339702129364
BEFORE: 6.394728183746338
AFTER: 5.771640300750732
BEFORE: 6.444136142730713
AFTER: 5.8192009925842285
BEFORE: 6.357662200927734
AFTER: 5.736602783203125
BEFORE: 6.16414213180542
AFTER: 5.551576137542725
BEFORE: 5.890199661254883
AFTER: 5.289899826049805
BEFORE: 0.2293935865163803
AFTER: 0.12005724012851715
BEFORE: 0.21581557393074036
AFTER: 0.11021561920642853
BEFORE: 0.19908711314201355
AFTER: 0.09826875478029251
BEFORE: 4.981093406677246
AFTER: 4.424208164215088
BEFORE: 0.16724856197834015
AFTER: 0.07615653425455093
BEFORE: 403.02655029296875
AFTER: 408.39056396484375
BEFORE: 4.439088821411133
AFTER: 3.910524845123291
BEFORE: 0.3683498799800873
AFTER: 0.22622552514076233
BEFORE: 0.4613478481769562
AFTER: 0.3009192645549774
BEFORE: 3.8987207412719727
AFTER: 3.4007012844085693
BEFORE: 0.5994513034820557
AFTER: 0.4152558147907257
BEFO

AFTER: 365.2808532714844
BEFORE: 358.2783203125
AFTER: 363.3155822753906
BEFORE: 1.2326264381408691
AFTER: 0.9821361899375916
BEFORE: 350.564697265625
AFTER: 356.1572570800781
BEFORE: 1.3315647840499878
AFTER: 1.0717222690582275
BEFORE: 2.220383405685425
AFTER: 1.8534011840820312
BEFORE: 1.3370375633239746
AFTER: 1.0703799724578857
BEFORE: 1.615557074546814
AFTER: 1.317099928855896
BEFORE: 1.6944741010665894
AFTER: 1.437242865562439
BEFORE: 3.10575008392334
AFTER: 2.6767258644104004
BEFORE: 2.111982583999634
AFTER: 1.950434923171997
BEFORE: 3.285902976989746
AFTER: 2.845911979675293
BEFORE: 3.280919313430786
AFTER: 2.841254711151123
BEFORE: 1.8584057092666626
AFTER: 1.706208348274231
BEFORE: 348.0284729003906
AFTER: 350.1639099121094
BEFORE: 1.2731764316558838
AFTER: 1.0093179941177368
BEFORE: 3.08079195022583
AFTER: 2.6370246410369873
BEFORE: 1.303162693977356
AFTER: 1.009697437286377
BEFORE: 2.859610080718994
AFTER: 2.4475433826446533
BEFORE: 1.2428970336914062
AFTER: 0.9924421310424

AFTER: 384.4420166015625
BEFORE: 0.19250598549842834
AFTER: 0.09035924077033997
BEFORE: 0.8127963542938232
AFTER: 0.6105631589889526
BEFORE: 0.8754692077636719
AFTER: 0.6655416488647461
BEFORE: 0.16600129008293152
AFTER: 0.0723789855837822
BEFORE: 0.15708795189857483
AFTER: 0.06649187207221985
Finished Episode 32
Took 0.6271820068359375
BEFORE: 0.6143157482147217
AFTER: 0.41627904772758484
BEFORE: 1.9331218004226685
AFTER: 1.6155328750610352
BEFORE: 0.5552849173545837
AFTER: 0.3675885498523712
BEFORE: 0.5233739614486694
AFTER: 0.3415168821811676
BEFORE: 0.48988091945648193
AFTER: 0.3143603503704071
BEFORE: 0.4533916711807251
AFTER: 0.28505051136016846
BEFORE: 0.4154672622680664
AFTER: 0.2549239993095398
BEFORE: 1.8697962760925293
AFTER: 1.5573877096176147
BEFORE: 1.8308945894241333
AFTER: 1.5216481685638428
BEFORE: 0.3243541121482849
AFTER: 0.1842709183692932
BEFORE: 1.7134760618209839
AFTER: 1.413973093032837
BEFORE: 0.28159260749816895
AFTER: 0.1521754115819931
BEFORE: 0.261655062437

BEFORE: 0.3660590648651123
AFTER: 0.22855573892593384
BEFORE: 0.4700582027435303
AFTER: 0.29557156562805176
BEFORE: 0.44146043062210083
AFTER: 0.27282196283340454
BEFORE: 0.30202099680900574
AFTER: 0.17823609709739685
BEFORE: 0.28274965286254883
AFTER: 0.16339118778705597
BEFORE: 0.36714231967926025
AFTER: 0.21471647918224335
BEFORE: 0.24770882725715637
AFTER: 0.1368357092142105
BEFORE: 0.3243345618247986
AFTER: 0.18205386400222778
BEFORE: 0.2180720865726471
AFTER: 0.11488914489746094
BEFORE: 0.2033977508544922
AFTER: 0.1042267307639122
BEFORE: 0.1891150325536728
AFTER: 0.09400156885385513
BEFORE: 0.2559323012828827
AFTER: 0.13146793842315674
BEFORE: 0.16595621407032013
AFTER: 0.07777465134859085
BEFORE: 0.1563510000705719
AFTER: 0.07119237631559372
BEFORE: 0.20456930994987488
AFTER: 0.09522068500518799
BEFORE: 0.1394658237695694
AFTER: 0.059869434684515
BEFORE: 0.13073725998401642
AFTER: 0.054156553000211716
BEFORE: 0.16466115415096283
AFTER: 0.0684967041015625
BEFORE: 0.1519478112459

BEFORE: 1.605159044265747
AFTER: 1.318207859992981
BEFORE: 0.8147991895675659
AFTER: 0.5841273069381714
BEFORE: 1.8366906642913818
AFTER: 1.5306774377822876
BEFORE: 0.7368347644805908
AFTER: 0.5179486870765686
BEFORE: 1.9595609903335571
AFTER: 1.6440316438674927
BEFORE: 1.9697093963623047
AFTER: 1.6534459590911865
BEFORE: 0.6307671666145325
AFTER: 0.4290608763694763
BEFORE: 0.5934647917747498
AFTER: 0.3981664776802063
BEFORE: 0.5544665455818176
AFTER: 0.3660936951637268
BEFORE: 1.8939992189407349
AFTER: 1.5837000608444214
BEFORE: 1.8402440547943115
AFTER: 1.534261703491211
BEFORE: 1.767748236656189
AFTER: 1.467617154121399
BEFORE: 1.6885651350021362
AFTER: 1.3950352668762207
BEFORE: 1.6047289371490479
AFTER: 1.3183523416519165
BEFORE: 0.41096511483192444
AFTER: 0.250444620847702
BEFORE: 0.3903103470802307
AFTER: 0.23451752960681915
BEFORE: 0.3678291141986847
AFTER: 0.21703477203845978
BEFORE: 1.3261514902114868
AFTER: 1.065282940864563
BEFORE: 1.260677456855774
AFTER: 1.006239056587219

AFTER: 1.9888311624526978
BEFORE: 441.0483093261719
AFTER: 447.0306396484375
BEFORE: 2.5148468017578125
AFTER: 2.0866518020629883
BEFORE: 2.6520557403564453
AFTER: 2.2129716873168945
BEFORE: 0.48433250188827515
AFTER: 0.3181101381778717
BEFORE: 2.766552686691284
AFTER: 2.318770170211792
BEFORE: 0.45482608675956726
AFTER: 0.2940616011619568
BEFORE: 0.435018390417099
AFTER: 0.2780451774597168
BEFORE: 2.7298617362976074
AFTER: 2.2851898670196533
BEFORE: 478.9706726074219
AFTER: 484.7885437011719
BEFORE: 0.5112797617912292
AFTER: 0.34028297662734985
BEFORE: 0.6071668863296509
AFTER: 0.4199766218662262
BEFORE: 2.537261486053467
AFTER: 2.10810923576355
BEFORE: 0.7429576516151428
AFTER: 0.5351210832595825
BEFORE: 2.3855578899383545
AFTER: 1.9690295457839966
BEFORE: 466.95501708984375
AFTER: 472.5926818847656
BEFORE: 2.1631696224212646
AFTER: 1.7660293579101562
BEFORE: 1.2275537252426147
AFTER: 0.9596478343009949
BEFORE: 1.9430314302444458
AFTER: 1.5660592317581177
BEFORE: 1.8358583450317383
A

KeyboardInterrupt: 

In [4]:
from jupyterplot import ProgressPlot
import numpy as np

vals = []
eps = []
import time

pp = ProgressPlot()

for i in range(10):
    time.sleep(0.5)
    pp.update(np.sin(i))


<IPython.core.display.Javascript object>