In [28]:
import time
from collections import deque

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as dist

import gym

from utils import ReplayBuffer,clip

In [29]:
device = 'cuda'

In [30]:
class Actor(nn.Module):
    def __init__(self, N_s, N_a, max_a):
        super().__init__()
        self.layer_1 = nn.Linear(N_s,200)
        self.layer_2 = nn.Linear(200,200)
        self.layer_3 = nn.Linear(200,N_a)
        self.N_s = N_s
        self.N_a = N_a
        self.max_a = torch.tensor(max_a, dtype = torch.float32).to(device)
    
    def forward(self, s):
        assert type(s) == torch.Tensor
        
        h = F.leaky_relu(self.layer_1(s))
        h = F.leaky_relu(self.layer_2(h))
        h = torch.tanh(self.layer_3(h)) * self. max_a
        return h

In [31]:
class Critic(nn.Module):
    def __init__(self,N_s,N_a):
        super().__init__()
        self.N_s = N_s
        self.N_a = N_a
        
        self.layer1 = nn.Linear(N_s+N_a,200)
        self.layer2 = nn.Linear(200,200)
        self.layer3 = nn.Linear(200,1)
    
    def forward(self,s,a):
        x = torch.cat((s,a),dim=-1)
        
        Q = F.leaky_relu(self.layer1(x))
        Q = F.leaky_relu(self.layer2(Q))
        Q = self.layer3(Q)
        
        return Q

In [32]:
class DDPG():
    def __init__(self, N_s, N_a, max_a):
        self.N_s = N_s
        self.N_a = N_a
        self.max_a = max_a
        
        self.actor = Actor(N_s, N_a, max_a)
        self.critic = Critic(N_s, N_a)
        self.target_critic = Critic(N_s, N_a)
        self.target_critic.load_state_dict(self.critic.state_dict())
        
        self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=1e-3)
        self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=1e-3)
        
    def to(self, device):
        self.actor.to(device)
        self.critic.to(device)
        self.target_critic.to(device)
        
    def polyak(self, tau):
        for param, target_param in zip(self.critic.parameters(),self.target_critic.parameters()):
            target_param.data.copy_(tau*param.data + (1-tau)*target_param.data)
    
    def target_Q(self, batch, gamma):
        NS = torch.tensor(batch['next_state'], dtype=torch.float32).to(device)
        R = torch.tensor(batch['reward'], dtype=torch.float32).to(device)
        D = torch.tensor(batch['done'],dtype=torch.int8).to(device)
        R = R.view(*R.shape,1)
        D = D.view(*D.shape,1)
        
        
        with torch.no_grad():
            A = self.actor(NS)
            NQ = self.target_critic(NS,A)
            
            return R + ( gamma * (~D) * NQ )
    
    def critic_loss(self, batch, gamma):
        S = torch.tensor(batch['state'], dtype=torch.float32).to(device)
        A = torch.tensor(batch['action'],dtype=torch.float32).to(device)
        
        Q = self.critic(S,A)
        target_Q = self.target_Q(batch, gamma)
        
        return F.mse_loss(Q,target_Q)
    
    def actor_loss(self, batch):
        S = torch.tensor(batch['state'], dtype=torch.float32).to(device)
        A = self.actor(S)
        #loss = -self.critic(S,A).mean()
        loss = -self.target_critic(S,A).mean()
        
        return loss
    
    def train_critic_once(self, batch, gamma=0.99, tau=0.005):
        self.critic_optim.zero_grad()
        L = self.critic_loss(batch, gamma)
        L.backward()
        self.critic_optim.step()
        
        self.polyak(tau)
        
        return L.item()
    
    def train_actor_once(self, batch):
        self.actor_optim.zero_grad()
        L = self.actor_loss(batch)
        L.backward()
        self.actor_optim.step()
        
        return L.item()
    
    def explore(self,s,sigma = 0.3):
        s = torch.tensor(s,dtype=torch.float32).to(device)
        with torch.no_grad():
            a = self.actor(s)
        a = a.detach().cpu().numpy()
        eps = np.random.normal(size = self.N_a, scale = sigma)
        
        return clip(a + eps,-self.max_a,self.max_a)

In [33]:
def run_episode(env, buffer, model, render = False, sigma = 0.3):
    s = env.reset()
    done = 0
    if render:
        env.render()
    while(not done):
        a = model.explore(s,sigma)
        ns, r, done, info = env.step(a)
        T = [s,ns,a,r,done]
        buffer.append(T)
        s = ns.copy()
        if render:
            env.render()

In [34]:
buffer = ReplayBuffer(max_len = 1e5)
env = gym.make('HalfCheetah-v2')

model = DDPG(17,6, np.array([1., 1., 1., 1., 1., 1., ]))
model.to(device)

In [35]:
# Add initial episodes to buffer w/o training
for episode in tqdm(range(3)):
    run_episode(env, buffer, model, render = False, sigma = 0.5)

100%|██████████| 3/3 [00:01<00:00,  2.84it/s]


In [36]:
# train
for episode in range(1000):
    render = (episode%30 == 0)
    run_episode(env, buffer, model, render = render, sigma = 0.3)
    
    for n in range(20):
        batch = buffer.sample(64)
        critic_L = model.train_critic_once(batch)
        if n%2 == 0:
            actor_L = model.train_actor_once(batch)
            print(f"critic loss: {critic_L}")
            print(f"actor loss: {actor_L}")
            

critic loss: 0.28720712661743164
actor loss: 0.2568731904029846
critic loss: 0.26839718222618103
actor loss: 0.19921834766864777
critic loss: 0.2989252209663391
actor loss: 0.19670307636260986
critic loss: 0.27499255537986755
actor loss: 0.17042189836502075
critic loss: 0.21138548851013184
actor loss: 0.1932041049003601
critic loss: 0.20215356349945068
actor loss: 0.18561962246894836
critic loss: 0.336820513010025
actor loss: 0.16610094904899597
critic loss: 0.21517662703990936
actor loss: 0.1770942658185959
critic loss: 0.32737797498703003
actor loss: 0.21814242005348206
critic loss: 0.20041225850582123
actor loss: 0.1885819137096405
critic loss: 0.21261729300022125
actor loss: 0.20145143568515778
critic loss: 0.1706506609916687
actor loss: 0.16598635911941528
critic loss: 0.1704578697681427
actor loss: 0.15043050050735474
critic loss: 0.22408708930015564
actor loss: 0.19503505527973175
critic loss: 0.17356698215007782
actor loss: 0.1740933358669281
critic loss: 0.17597603797912598
ac

critic loss: 0.028407983481884003
actor loss: 0.04146113246679306
critic loss: 0.020687909796833992
actor loss: -0.004398996010422707
critic loss: 0.031417056918144226
actor loss: 0.0443970188498497
critic loss: 0.03532952070236206
actor loss: 0.053513363003730774
critic loss: 0.022562311962246895
actor loss: -0.020372986793518066
critic loss: 0.02402665466070175
actor loss: 0.026809953153133392
critic loss: 0.02963927388191223
actor loss: -0.0027219653129577637
critic loss: 0.022590097039937973
actor loss: 0.03872513025999069
critic loss: 0.0349181666970253
actor loss: -0.05897269397974014
critic loss: 0.024339187890291214
actor loss: 0.04072897881269455
critic loss: 0.020313020795583725
actor loss: 0.012588007375597954
critic loss: 0.033292196691036224
actor loss: 0.013273593969643116
critic loss: 0.021288912743330002
actor loss: -0.002220064401626587
critic loss: 0.03317347913980484
actor loss: 0.10444781929254532
critic loss: 0.02054767683148384
actor loss: 0.03233528137207031
crit

critic loss: 0.029246743768453598
actor loss: 0.16684159636497498
critic loss: 0.0176417026668787
actor loss: 0.07240474224090576
critic loss: 0.021881571039557457
actor loss: 0.1131674274802208
critic loss: 0.016253773123025894
actor loss: 0.1259060651063919
critic loss: 0.013262457214295864
actor loss: 0.10920711606740952
critic loss: 0.021411443129181862
actor loss: 0.12322106957435608
critic loss: 0.013937722891569138
actor loss: 0.14031465351581573
critic loss: 0.02184877172112465
actor loss: 0.1080276221036911
critic loss: 0.010415395721793175
actor loss: 0.09547178447246552
critic loss: 0.01813937909901142
actor loss: 0.1004423052072525
critic loss: 0.025791611522436142
actor loss: 0.08517482876777649
critic loss: 0.017422717064619064
actor loss: 0.08598370850086212
critic loss: 0.013575146906077862
actor loss: 0.11506890505552292
critic loss: 0.011939771473407745
actor loss: 0.023108989000320435
critic loss: 0.009552063420414925
actor loss: 0.12183147668838501
critic loss: 0.01

critic loss: 0.007033245638012886
actor loss: 0.10860027372837067
critic loss: 0.008977201767265797
actor loss: 0.14456051588058472
critic loss: 0.014796123839914799
actor loss: 0.1418054699897766
critic loss: 0.011126548983156681
actor loss: 0.16028240323066711
critic loss: 0.01469759363681078
actor loss: 0.1119702160358429
critic loss: 0.012456128373742104
actor loss: 0.12102331221103668
critic loss: 0.008625539019703865
actor loss: 0.12669062614440918
critic loss: 0.008168144151568413
actor loss: 0.1438223123550415
critic loss: 0.022276621311903
actor loss: 0.12006644159555435
critic loss: 0.010580537840723991
actor loss: 0.16244181990623474
critic loss: 0.012792350724339485
actor loss: 0.13760191202163696
critic loss: 0.01366143673658371
actor loss: 0.08656105399131775
critic loss: 0.008379578590393066
actor loss: 0.1727626919746399
critic loss: 0.013542959466576576
actor loss: 0.10596280544996262
critic loss: 0.0088586937636137
actor loss: 0.14352299273014069
critic loss: 0.009254

critic loss: 0.0066070775501430035
actor loss: 0.19009089469909668
critic loss: 0.009568296372890472
actor loss: 0.10508624464273453
critic loss: 0.007826586253941059
actor loss: 0.14511562883853912
critic loss: 0.007031538523733616
actor loss: 0.1655738651752472
critic loss: 0.008990254253149033
actor loss: 0.12950152158737183
critic loss: 0.008290794678032398
actor loss: 0.140803724527359
critic loss: 0.011501245200634003
actor loss: 0.1331871747970581
critic loss: 0.014136831276118755
actor loss: 0.12039359658956528
critic loss: 0.005554533563554287
actor loss: 0.09821756184101105
critic loss: 0.013529608026146889
actor loss: 0.11717015504837036
critic loss: 0.010218803770840168
actor loss: 0.1768786460161209
critic loss: 0.007675092201679945
actor loss: 0.1418658345937729
critic loss: 0.008921021595597267
actor loss: 0.12058951705694199
critic loss: 0.010786904022097588
actor loss: 0.1440374255180359
critic loss: 0.006807185709476471
actor loss: 0.1490648239850998
critic loss: 0.00

critic loss: 0.00369798531755805
actor loss: 0.17819367349147797
critic loss: 0.00983349233865738
actor loss: 0.14977234601974487
critic loss: 0.004489137791097164
actor loss: 0.1855953484773636
critic loss: 0.005241838283836842
actor loss: 0.15238435566425323
critic loss: 0.007245819550007582
actor loss: 0.17917388677597046
critic loss: 0.004702295642346144
actor loss: 0.20077043771743774
critic loss: 0.007344273384660482
actor loss: 0.1769893765449524
critic loss: 0.007265985943377018
actor loss: 0.18280288577079773
critic loss: 0.007354591973125935
actor loss: 0.16621357202529907
critic loss: 0.007336330600082874
actor loss: 0.18192756175994873
critic loss: 0.015140678733587265
actor loss: 0.1812841147184372
critic loss: 0.006700647063553333
actor loss: 0.14555113017559052
critic loss: 0.004001189488917589
actor loss: 0.17999452352523804
critic loss: 0.00562346912920475
actor loss: 0.14036953449249268
critic loss: 0.009145285934209824
actor loss: 0.15037955343723297
critic loss: 0.0

critic loss: 0.011637305840849876
actor loss: 0.05431698262691498
critic loss: 0.009703917428851128
actor loss: 0.12277393043041229
critic loss: 0.017713958397507668
actor loss: 0.10190171003341675
critic loss: 0.007136807311326265
actor loss: 0.1218252032995224
critic loss: 0.016291936859488487
actor loss: 0.10855525732040405
critic loss: 0.011520215310156345
actor loss: 0.11175908893346786
critic loss: 0.00818345881998539
actor loss: 0.10757531225681305
critic loss: 0.008168607018887997
actor loss: 0.10146571695804596
critic loss: 0.01168688666075468
actor loss: 0.07290561497211456
critic loss: 0.014919500797986984
actor loss: 0.08035753667354584
critic loss: 0.01053745299577713
actor loss: 0.09330623596906662
critic loss: 0.00896614883095026
actor loss: 0.09031057357788086
critic loss: 0.007792516611516476
actor loss: 0.09491939842700958
critic loss: 0.008625632151961327
actor loss: 0.08505268394947052
critic loss: 0.014378520660102367
actor loss: 0.0866013839840889
critic loss: 0.0

critic loss: 0.00551675446331501
actor loss: 0.11807648092508316
critic loss: 0.009523440152406693
actor loss: 0.09286083281040192
critic loss: 0.007865800522267818
actor loss: 0.0894862487912178
critic loss: 0.007915589958429337
actor loss: 0.09840135276317596
critic loss: 0.00893780030310154
actor loss: 0.09442123770713806
critic loss: 0.008887132629752159
actor loss: 0.09929254651069641
critic loss: 0.006606290116906166
actor loss: 0.13070449233055115
critic loss: 0.007539806887507439
actor loss: 0.09571732580661774
critic loss: 0.007230532355606556
actor loss: 0.11052742600440979
critic loss: 0.009341191500425339
actor loss: 0.10458841919898987
critic loss: 0.011509222909808159
actor loss: 0.11424063891172409
critic loss: 0.008100558072328568
actor loss: 0.11295178532600403
critic loss: 0.009442314505577087
actor loss: 0.10063871741294861
critic loss: 0.008876040577888489
actor loss: 0.11341891437768936
critic loss: 0.00985827948898077
actor loss: 0.1034231185913086
critic loss: 0.

critic loss: 0.007835578173398972
actor loss: 0.11246553063392639
critic loss: 0.012447647750377655
actor loss: 0.10080879926681519
critic loss: 0.0056560831144452095
actor loss: 0.09896992146968842
critic loss: 0.011848087422549725
actor loss: 0.07966010272502899
critic loss: 0.00695302989333868
actor loss: 0.09804954379796982
critic loss: 0.007315142545849085
actor loss: 0.1268729865550995
critic loss: 0.008138855919241905
actor loss: 0.1284448504447937
critic loss: 0.011801213026046753
actor loss: 0.07665273547172546
critic loss: 0.007144182920455933
actor loss: 0.08040010184049606
critic loss: 0.004501953721046448
actor loss: 0.08670368045568466
critic loss: 0.008256426081061363
actor loss: 0.08425794541835785
critic loss: 0.008417363278567791
actor loss: 0.11254838854074478
critic loss: 0.006469326093792915
actor loss: 0.10981787741184235
critic loss: 0.006576739251613617
actor loss: 0.03554645925760269
critic loss: 0.007107955869287252
actor loss: 0.06933476030826569
critic loss:

critic loss: 0.010473827831447124
actor loss: 0.04495278745889664
critic loss: 0.008704448118805885
actor loss: 0.1266489177942276
critic loss: 0.008237414062023163
actor loss: 0.09481926262378693
critic loss: 0.0062605831772089005
actor loss: 0.07864917814731598
critic loss: 0.007734829094260931
actor loss: 0.03462177515029907
critic loss: 0.008605164475739002
actor loss: 0.04836317151784897
critic loss: 0.016909176483750343
actor loss: 0.027638768777251244
critic loss: 0.005442463327199221
actor loss: 0.09137986600399017
critic loss: 0.006449522450566292
actor loss: 0.06364338845014572
critic loss: 0.006946945562958717
actor loss: 0.09342247992753983
critic loss: 0.00829883199185133
actor loss: 0.08940647542476654
critic loss: 0.007897189818322659
actor loss: 0.07161596417427063
critic loss: 0.007593528367578983
actor loss: 0.11279471963644028
critic loss: 0.007038634270429611
actor loss: 0.09746687114238739
critic loss: 0.014309873804450035
actor loss: 0.0668230801820755
critic loss

critic loss: 0.005972776561975479
actor loss: 0.07695917785167694
critic loss: 0.005333415698260069
actor loss: 0.09721043705940247
critic loss: 0.0076173944398760796
actor loss: 0.09073488414287567
critic loss: 0.006575596518814564
actor loss: 0.09293244779109955
critic loss: 0.005560053512454033
actor loss: 0.05994652584195137
critic loss: 0.006928044371306896
actor loss: 0.04905565083026886
critic loss: 0.0055997176095843315
actor loss: 0.08618703484535217
critic loss: 0.004364347085356712
actor loss: 0.04886210337281227
critic loss: 0.006518937647342682
actor loss: 0.03837747871875763
critic loss: 0.007516958750784397
actor loss: 0.08770839124917984
critic loss: 0.004277093335986137
actor loss: 0.04434797540307045
critic loss: 0.009130316786468029
actor loss: 0.05860845372080803
critic loss: 0.008360747247934341
actor loss: 0.06528370082378387
critic loss: 0.004282449837774038
actor loss: 0.052020926028490067
critic loss: 0.01248930674046278
actor loss: 0.04671254754066467
critic l

critic loss: 0.004532185848802328
actor loss: 0.0566156730055809
critic loss: 0.00716851931065321
actor loss: 0.09321299195289612
critic loss: 0.004488037899136543
actor loss: 0.05282590538263321
critic loss: 0.0031697924714535475
actor loss: 0.07846598327159882
critic loss: 0.006930646486580372
actor loss: 0.13564682006835938
critic loss: 0.0072930059395730495
actor loss: 0.09740522503852844
critic loss: 0.00450711278244853
actor loss: 0.08959231525659561
critic loss: 0.0034752213396131992
actor loss: 0.05954226106405258
critic loss: 0.004952262621372938
actor loss: 0.07260812819004059
critic loss: 0.008227386511862278
actor loss: 0.04639003798365593
critic loss: 0.005123679060488939
actor loss: 0.05169583857059479
critic loss: 0.005828628782182932
actor loss: 0.08438298106193542
critic loss: 0.004704377613961697
actor loss: 0.04643510282039642
critic loss: 0.004406188149005175
actor loss: 0.07903169095516205
critic loss: 0.004337668884545565
actor loss: 0.0927576869726181
critic loss

critic loss: 0.0042141033336520195
actor loss: 0.07841460406780243
critic loss: 0.0033339231740683317
actor loss: 0.06558842957019806
critic loss: 0.0027562202885746956
actor loss: 0.05562789738178253
critic loss: 0.00410202331840992
actor loss: 0.07349400222301483
critic loss: 0.004247969016432762
actor loss: 0.06186157092452049
critic loss: 0.00875675305724144
actor loss: 0.03269375115633011
critic loss: 0.0047324057668447495
actor loss: 0.07789798080921173
critic loss: 0.005982749164104462
actor loss: 0.06327249854803085
critic loss: 0.0030539934523403645
actor loss: 0.05326368287205696
critic loss: 0.006364782340824604
actor loss: 0.049499474465847015
critic loss: 0.004441097378730774
actor loss: 0.07473121583461761
critic loss: 0.004568992182612419
actor loss: 0.0649687796831131
critic loss: 0.004203038290143013
actor loss: 0.09618920832872391
critic loss: 0.002304208930581808
actor loss: 0.07696752995252609
critic loss: 0.005470622330904007
actor loss: 0.0584312304854393
critic l

critic loss: 0.0027417512610554695
actor loss: 0.09847535192966461
critic loss: 0.00934785045683384
actor loss: 0.03886900097131729
critic loss: 0.003973673097789288
actor loss: 0.04293616861104965
critic loss: 0.005131673067808151
actor loss: 0.0825653001666069
critic loss: 0.0035687237977981567
actor loss: 0.09783423691987991
critic loss: 0.0032287847716361284
actor loss: 0.06533818691968918
critic loss: 0.007836866192519665
actor loss: 0.02503310516476631
critic loss: 0.004456540569663048
actor loss: 0.09011770784854889
critic loss: 0.0045705013908445835
actor loss: 0.03683091700077057
critic loss: 0.003561480902135372
actor loss: 0.057587962597608566
critic loss: 0.004932148847728968
actor loss: 0.07803662866353989
critic loss: 0.003823001403361559
actor loss: 0.03191349282860756
critic loss: 0.003730509663000703
actor loss: 0.05713341385126114
critic loss: 0.004569650162011385
actor loss: 0.028077494353055954
critic loss: 0.004226690158247948
actor loss: 0.07569672167301178
critic

critic loss: 0.0030937367118895054
actor loss: 0.10802431404590607
critic loss: 0.005638044327497482
actor loss: 0.055069442838430405
critic loss: 0.0024424572475254536
actor loss: 0.029719127342104912
critic loss: 0.004430143628269434
actor loss: 0.08289894461631775
critic loss: 0.007197052240371704
actor loss: 0.01051434874534607
critic loss: 0.00512531865388155
actor loss: 0.0753602683544159
critic loss: 0.004769057966768742
actor loss: 0.04774948209524155
critic loss: 0.003552983980625868
actor loss: 0.09084871411323547
critic loss: 0.005044244229793549
actor loss: 0.08659423142671585
critic loss: 0.0034636612981557846
actor loss: 0.03727339580655098
critic loss: 0.0038890065625309944
actor loss: 0.056018803268671036
critic loss: 0.0030065872706472874
actor loss: 0.05919831991195679
critic loss: 0.004069135989993811
actor loss: 0.07771286368370056
critic loss: 0.0017992407083511353
actor loss: 0.07104083895683289
critic loss: 0.005780487786978483
actor loss: 0.053490158170461655
cr

critic loss: 0.0029085588175803423
actor loss: 0.030789891257882118
critic loss: 0.0026313122361898422
actor loss: 0.07615281641483307
critic loss: 0.0030784239061176777
actor loss: 0.08805514127016068
critic loss: 0.0022362140007317066
actor loss: 0.07541333138942719
critic loss: 0.004339421633630991
actor loss: 0.08351016789674759
critic loss: 0.0025902283377945423
actor loss: 0.047935135662555695
critic loss: 0.0031346988398581743
actor loss: 0.05983994901180267
critic loss: 0.0022676209919154644
actor loss: 0.097023606300354
critic loss: 0.002869406482204795
actor loss: 0.0738963931798935
critic loss: 0.003914461936801672
actor loss: 0.06764326989650726
critic loss: 0.0026636733673512936
actor loss: 0.06494742631912231
critic loss: 0.0038190740160644054
actor loss: 0.06781882792711258
critic loss: 0.004509373567998409
actor loss: 0.0339130274951458
critic loss: 0.003109485376626253
actor loss: 0.06589058041572571
critic loss: 0.004366632085293531
actor loss: 0.049436137080192566
cr

critic loss: 0.00293735065497458
actor loss: 0.02745375595986843
critic loss: 0.007179217413067818
actor loss: 0.025149298831820488
critic loss: 0.00253572897054255
actor loss: 0.06859784573316574
critic loss: 0.0040937261655926704
actor loss: 0.022455431520938873
critic loss: 0.0022805379703640938
actor loss: 0.05915067344903946
critic loss: 0.0036926837638020515
actor loss: 0.042614344507455826
critic loss: 0.004724380560219288
actor loss: 0.09103284776210785
critic loss: 0.004117825999855995
actor loss: 0.08056715130805969
critic loss: 0.005119232460856438
actor loss: 0.0855269655585289
critic loss: 0.004893672652542591
actor loss: 0.0659625381231308
critic loss: 0.0043396782130002975
actor loss: 0.0726083517074585
critic loss: 0.005622006021440029
actor loss: 0.048856720328330994
critic loss: 0.00368174584582448
actor loss: 0.06731437891721725
critic loss: 0.005932524334639311
actor loss: 0.05918658524751663
critic loss: 0.0025910139083862305
actor loss: 0.07007317245006561
critic 

critic loss: 0.002088779117912054
actor loss: 0.0349789559841156
critic loss: 0.0037843119353055954
actor loss: 0.06813688576221466
critic loss: 0.004012902267277241
actor loss: 0.04030769690871239
critic loss: 0.008472633548080921
actor loss: 0.053272541612386703
critic loss: 0.0022634672932326794
actor loss: 0.0760546624660492
critic loss: 0.0017587526235729456
actor loss: 0.07128049433231354
critic loss: 0.011286034248769283
actor loss: 0.044403836131095886
critic loss: 0.002068743109703064
actor loss: 0.0929659754037857
critic loss: 0.002468460239470005
actor loss: 0.03253176808357239
critic loss: 0.007378438487648964
actor loss: 0.09990110248327255
critic loss: 0.002940214704722166
actor loss: 0.007546044886112213
critic loss: 0.003469334216788411
actor loss: 0.08572603762149811
critic loss: 0.002305313479155302
actor loss: 0.03313572332262993
critic loss: 0.002583881374448538
actor loss: 0.022430365905165672
critic loss: 0.0027565709315240383
actor loss: 0.06116406247019768
criti

critic loss: 0.002577677834779024
actor loss: 0.03293124958872795
critic loss: 0.002782115712761879
actor loss: 0.03488429635763168
critic loss: 0.00203518895432353
actor loss: 0.07881829887628555
critic loss: 0.0024111068341881037
actor loss: 0.04509808123111725
critic loss: 0.0017947428859770298
actor loss: 0.07078081369400024
critic loss: 0.0028332513757050037
actor loss: 0.07118869572877884
critic loss: 0.0070659201592206955
actor loss: 0.048710018396377563
critic loss: 0.004675906617194414
actor loss: 0.030637629330158234
critic loss: 0.005882450845092535
actor loss: 0.08780726790428162
critic loss: 0.004206591285765171
actor loss: 0.05883186310529709
critic loss: 0.004104470368474722
actor loss: 0.047117140144109726
critic loss: 0.0025498720351606607
actor loss: 0.054591432213783264
critic loss: 0.004710364621132612
actor loss: 0.05968644469976425
critic loss: 0.0034562854561954737
actor loss: 0.07586532831192017
critic loss: 0.001995696220546961
actor loss: 0.05314921587705612
c

critic loss: 0.0036424538120627403
actor loss: 0.049075961112976074
critic loss: 0.003116367384791374
actor loss: 0.02917894907295704
critic loss: 0.0028926553204655647
actor loss: 0.03545858711004257
critic loss: 0.002008522395044565
actor loss: 0.04201880842447281
critic loss: 0.002603986067697406
actor loss: 0.062039852142333984
critic loss: 0.0029337434098124504
actor loss: 0.06017854064702988
critic loss: 0.003943608142435551
actor loss: 0.0610806979238987
critic loss: 0.004779780749231577
actor loss: 0.04946622624993324
critic loss: 0.004977050237357616
actor loss: 0.012634369544684887
critic loss: 0.0030361711978912354
actor loss: 0.05057366192340851
critic loss: 0.0013799946755170822
actor loss: 0.061990708112716675
critic loss: 0.0023852670565247536
actor loss: 0.056437574326992035
critic loss: 0.0016394926933571696
actor loss: 0.024399152025580406
critic loss: 0.010352729819715023
actor loss: 0.030020393431186676
critic loss: 0.0029951208271086216
actor loss: 0.04273590818047

critic loss: 0.002087373286485672
actor loss: 0.04370518773794174
critic loss: 0.001677626627497375
actor loss: 0.055804625153541565
critic loss: 0.0025613391771912575
actor loss: 0.05944613367319107
critic loss: 0.0022755106911063194
actor loss: 0.06824040412902832
critic loss: 0.002395796589553356
actor loss: 0.03379688039422035
critic loss: 0.002165812999010086
actor loss: 0.03927032649517059
critic loss: 0.002200415125116706
actor loss: 0.039059095084667206
critic loss: 0.0020895672496408224
actor loss: 0.05875735729932785
critic loss: 0.0037126149982213974
actor loss: 0.0022576856426894665
critic loss: 0.0021302313543856144
actor loss: 0.016113564372062683
critic loss: 0.002832971978932619
actor loss: 0.04917730391025543
critic loss: 0.0031463794875890017
actor loss: -0.008456842042505741
critic loss: 0.00213650893419981
actor loss: 0.026273265480995178
critic loss: 0.0027545089833438396
actor loss: 0.06944163888692856
critic loss: 0.002852770034223795
actor loss: 0.07082249969244

critic loss: 0.002756197704002261
actor loss: 0.05371524393558502
critic loss: 0.0018153174314647913
actor loss: 0.038191091269254684
critic loss: 0.0022280141711235046
actor loss: 0.037336766719818115
critic loss: 0.00179151666816324
actor loss: 0.02466331236064434
critic loss: 0.002914468292146921
actor loss: 0.002530410885810852
critic loss: 0.004344550892710686
actor loss: 0.027339041233062744
critic loss: 0.0017850205767899752
actor loss: 0.04341551288962364
critic loss: 0.0014709359966218472
actor loss: 0.020939700305461884
critic loss: 0.0015321655664592981
actor loss: 0.06711021065711975
critic loss: 0.001124924048781395
actor loss: 0.03664296865463257
critic loss: 0.0027667831163853407
actor loss: 0.06878864765167236
critic loss: 0.001047689700499177
actor loss: 0.020827308297157288
critic loss: 0.0021176915615797043
actor loss: 0.04689328745007515
critic loss: 0.0015067416243255138
actor loss: 0.030695846304297447
critic loss: 0.002248885575681925
actor loss: 0.03290997445583

critic loss: 0.002227595541626215
actor loss: 0.0492938756942749
critic loss: 0.002138375071808696
actor loss: 0.03421665355563164
critic loss: 0.0010064338566735387
actor loss: 0.026358239352703094
critic loss: 0.0014808285050094128
actor loss: 0.0347372442483902
critic loss: 0.0008986681350506842
actor loss: 0.023160647600889206
critic loss: 0.002188796875998378
actor loss: 0.026551969349384308
critic loss: 0.0019497672328725457
actor loss: 0.047853149473667145
critic loss: 0.002241178648546338
actor loss: 0.0588562972843647
critic loss: 0.0013412911212071776
actor loss: 0.05380285903811455
critic loss: 0.0013829706003889441
actor loss: 0.04062170907855034
critic loss: 0.0014302220661193132
actor loss: 0.055730290710926056
critic loss: 0.0019262038404121995
actor loss: 0.06673392653465271
critic loss: 0.0011029655579477549
actor loss: 0.024471722543239594
critic loss: 0.0014649702934548259
actor loss: 0.060505274683237076
critic loss: 0.0017520638648420572
actor loss: 0.0930819958448

critic loss: 0.0014407418202608824
actor loss: 0.0858207419514656
critic loss: 0.004025627393275499
actor loss: 0.0758594498038292
critic loss: 0.001154465600848198
actor loss: 0.049881916493177414
critic loss: 0.0014238639269024134
actor loss: 0.040051285177469254
critic loss: 0.0011833118041977286
actor loss: 0.04421966150403023
critic loss: 0.0016879491740837693
actor loss: 0.017297513782978058
critic loss: 0.0027227462269365788
actor loss: 0.025934353470802307
critic loss: 0.0016464984510093927
actor loss: 0.048851434141397476
critic loss: 0.0034252998884767294
actor loss: 0.05333276838064194
critic loss: 0.0014315354637801647
actor loss: 0.019793877378106117
critic loss: 0.002291057724505663
actor loss: 0.037441421300172806
critic loss: 0.0022543491795659065
actor loss: 0.017204441130161285
critic loss: 0.0009831112110987306
actor loss: 0.05233947932720184
critic loss: 0.002206838456913829
actor loss: 0.040792353451251984
critic loss: 0.0028842580504715443
actor loss: 0.0880144312

critic loss: 0.0021661734208464622
actor loss: 0.00952913984656334
critic loss: 0.0023062133695930243
actor loss: 0.037424203008413315
critic loss: 0.001831808127462864
actor loss: 0.03914565593004227
critic loss: 0.0013403230113908648
actor loss: 0.05587712302803993
critic loss: 0.0020304671488702297
actor loss: 0.06319663673639297
critic loss: 0.0026490138843655586
actor loss: 0.05751295015215874
critic loss: 0.003508195746690035
actor loss: 0.05832591652870178
critic loss: 0.00419358815997839
actor loss: 0.013925295323133469
critic loss: 0.003505467437207699
actor loss: 0.0569029375910759
critic loss: 0.0018440623534843326
actor loss: 0.03264567628502846
critic loss: 0.0019927560351788998
actor loss: 0.04596436396241188
critic loss: 0.004725786857306957
actor loss: 0.05556364357471466
critic loss: 0.001968652242794633
actor loss: 0.03291207551956177
critic loss: 0.0011295061558485031
actor loss: 0.019556140527129173
critic loss: 0.0036203935742378235
actor loss: 0.036626480519771576

critic loss: 0.001323248608969152
actor loss: 0.04839190095663071
critic loss: 0.0013546607224270701
actor loss: 0.06388392299413681
critic loss: 0.007098298519849777
actor loss: 0.021482761949300766
critic loss: 0.0017386978724971414
actor loss: 0.07658659666776657
critic loss: 0.001548872678540647
actor loss: 0.018344953656196594
critic loss: 0.0016552339075133204
actor loss: 0.04250454157590866
critic loss: 0.0015373517526313663
actor loss: 0.05983217433094978
critic loss: 0.00240794918499887
actor loss: 0.04533363878726959
critic loss: 0.0030188916716724634
actor loss: 0.02638309821486473
critic loss: 0.0018151840195059776
actor loss: 0.067493736743927
critic loss: 0.0017514037899672985
actor loss: 0.06261409819126129
critic loss: 0.001492764800786972
actor loss: 0.0014599170535802841
critic loss: 0.0033652835991233587
actor loss: 0.048794109374284744
critic loss: 0.0017462637042626739
actor loss: 0.006266491487622261
critic loss: 0.0039059254340827465
actor loss: 0.037698607891798

critic loss: 0.0007167735602706671
actor loss: 0.053130198270082474
critic loss: 0.0013800474116578698
actor loss: 0.05399215221405029
critic loss: 0.0010993906762450933
actor loss: 0.052943211048841476
critic loss: 0.00196982198394835
actor loss: 0.043151162564754486
critic loss: 0.0017708573723211884
actor loss: 0.016798391938209534
critic loss: 0.0019062119536101818
actor loss: 0.015641972422599792
critic loss: 0.0041059283539652824
actor loss: 0.06665826588869095
critic loss: 0.0022079837508499622
actor loss: 0.015608478337526321
critic loss: 0.0009306041756644845
actor loss: 0.0336422435939312
critic loss: 0.0028575907927006483
actor loss: 0.06652075052261353
critic loss: 0.0024483678862452507
actor loss: 0.04878229647874832
critic loss: 0.001716867322102189
actor loss: 0.040578443557024
critic loss: 0.0008801919175311923
actor loss: 0.05032876878976822
critic loss: 0.001802725251764059
actor loss: 0.010432329028844833
critic loss: 0.0046005528420209885
actor loss: 0.0390796624124

critic loss: 0.0014020660892128944
actor loss: 0.02240743674337864
critic loss: 0.003615662455558777
actor loss: 0.04247066378593445
critic loss: 0.002387422136962414
actor loss: 0.019498970359563828
critic loss: 0.003378999186679721
actor loss: 0.0350981131196022
critic loss: 0.0021662013605237007
actor loss: 0.08374135196208954
critic loss: 0.0017100096447393298
actor loss: 0.02937278151512146
critic loss: 0.0017348809633404016
actor loss: 0.014804506674408913
critic loss: 0.0028260324615985155
actor loss: 0.04250843822956085
critic loss: 0.0009876999538391829
actor loss: 0.023657193407416344
critic loss: 0.0033904826268553734
actor loss: 0.06990070641040802
critic loss: 0.0026560286059975624
actor loss: 0.00653956551104784
critic loss: 0.001895594410598278
actor loss: 0.030556660145521164
critic loss: 0.0016983821988105774
actor loss: 0.045865606516599655
critic loss: 0.002080119214951992
actor loss: 0.056029271334409714
critic loss: 0.0023458097130060196
actor loss: 0.0557857118546

critic loss: 0.0025954199954867363
actor loss: 0.027884740382432938
critic loss: 0.0016060511115938425
actor loss: 0.08063818514347076
critic loss: 0.001302383840084076
actor loss: 0.021304292604327202
critic loss: 0.001110154902562499
actor loss: 0.04410393536090851
critic loss: 0.0010017543099820614
actor loss: 0.05390593409538269
critic loss: 0.0015615695156157017
actor loss: 0.019552841782569885
critic loss: 0.0015099422307685018
actor loss: 0.011397145688533783
critic loss: 0.0012909003999084234
actor loss: 0.053730595856904984
critic loss: 0.0019312005024403334
actor loss: 0.033569321036338806
critic loss: 0.0020462991669774055
actor loss: 0.06424570083618164
critic loss: 0.0016944266390055418
actor loss: -0.009361238218843937
critic loss: 0.0011471663601696491
actor loss: 0.027983885258436203
critic loss: 0.0014273509150370955
actor loss: 0.02468978427350521
critic loss: 0.0014926518779248
actor loss: 0.017840679734945297
critic loss: 0.0013227545423433185
actor loss: -0.0064033

critic loss: 0.0013233036734163761
actor loss: 0.033020488917827606
critic loss: 0.0018605688819661736
actor loss: 0.011114915832877159
critic loss: 0.0021993685513734818
actor loss: 0.032717157155275345
critic loss: 0.002130009699612856
actor loss: 0.032070692628622055
critic loss: 0.0013540508225560188
actor loss: 0.04706459492444992
critic loss: 0.0016734977252781391
actor loss: 0.03364741802215576
critic loss: 0.0009892552625387907
actor loss: 0.03210081160068512
critic loss: 0.0014565784949809313
actor loss: 0.05325763672590256
critic loss: 0.0012325900606811047
actor loss: 0.011560105718672276
critic loss: 0.0021403636783361435
actor loss: 0.027943946421146393
critic loss: 0.001841419143602252
actor loss: 0.05068635940551758
critic loss: 0.0015712877502664924
actor loss: 0.06214957684278488
critic loss: 0.0018947395728901029
actor loss: 0.013013318181037903
critic loss: 0.0014487139414995909
actor loss: 0.004219990223646164
critic loss: 0.0015464742900803685
actor loss: 0.0293595

critic loss: 0.004613465629518032
actor loss: 0.0521063432097435
critic loss: 0.0012041927548125386
actor loss: 0.014337429776787758
critic loss: 0.0017721730982884765
actor loss: 0.03873052820563316
critic loss: 0.001728116418235004
actor loss: 0.023343689739704132
critic loss: 0.0019050072878599167
actor loss: 0.044844016432762146
critic loss: 0.0020224524196237326
actor loss: 0.03385491669178009
critic loss: 0.0010773742105811834
actor loss: 0.06715698540210724
critic loss: 0.0019565033726394176
actor loss: 0.01465081237256527
critic loss: 0.0017979743424803019
actor loss: 0.037290092557668686
critic loss: 0.0017277693841606379
actor loss: 0.03316522762179375
critic loss: 0.004560819827020168
actor loss: 0.033974818885326385
critic loss: 0.0028587724082171917
actor loss: 0.018005285412073135
critic loss: 0.0020009425934404135
actor loss: 0.04481629282236099
critic loss: 0.002082378137856722
actor loss: 0.0470510758459568
critic loss: 0.002885655499994755
actor loss: 0.03778134286403

critic loss: 0.0013802892062813044
actor loss: 0.03342090919613838
critic loss: 0.00231002876535058
actor loss: 0.008374050259590149
critic loss: 0.0013440812472254038
actor loss: 0.01786264032125473
critic loss: 0.002053267089650035
actor loss: 0.023603200912475586
critic loss: 0.002229405101388693
actor loss: 0.04032908380031586
critic loss: 0.0020557718817144632
actor loss: 0.03268009051680565
critic loss: 0.0010387127986177802
actor loss: 0.039885882288217545
critic loss: 0.001298920949921012
actor loss: 0.028825074434280396
critic loss: 0.002046536188572645
actor loss: 0.04224330931901932
critic loss: 0.002314805518835783
actor loss: 0.01589697226881981
critic loss: 0.0030271275900304317
actor loss: 0.03255860507488251
critic loss: 0.010283850133419037
actor loss: 0.04559115692973137
critic loss: 0.00111858150921762
actor loss: 0.02787245810031891
critic loss: 0.0027067253831773996
actor loss: 0.023421313613653183
critic loss: 0.0017518564127385616
actor loss: 0.028572505339980125

critic loss: 0.00158104975707829
actor loss: 0.030615348368883133
critic loss: 0.0008401155937463045
actor loss: 0.05529485270380974
critic loss: 0.0014725344954058528
actor loss: 0.012264616787433624
critic loss: 0.0009855382377281785
actor loss: 0.034996919333934784
critic loss: 0.0009410722414031625
actor loss: 0.04432173818349838
critic loss: 0.0011027881409972906
actor loss: 0.041676927357912064
critic loss: 0.0010065920650959015
actor loss: 0.026863861829042435
critic loss: 0.0012008489575237036
actor loss: 0.0527256615459919
critic loss: 0.0008540095877833664
actor loss: 0.04355441406369209
critic loss: 0.001177932252176106
actor loss: 0.026852384209632874
critic loss: 0.0014367492403835058
actor loss: 0.03740464150905609
critic loss: 0.0021903342567384243
actor loss: 0.029459210112690926
critic loss: 0.0013247665483504534
actor loss: 0.06665913760662079
critic loss: 0.0010045445524156094
actor loss: 0.044086653739213943
critic loss: 0.0014365512179210782
actor loss: 0.024907357

critic loss: 0.0016027460806071758
actor loss: 0.01587097719311714
critic loss: 0.0014896356733515859
actor loss: 0.023242615163326263
critic loss: 0.0016933598089963198
actor loss: 0.05999797582626343
critic loss: 0.001095148385502398
actor loss: 0.0011507619637995958
critic loss: 0.0013318900018930435
actor loss: 0.014252869412302971
critic loss: 0.0015191016718745232
actor loss: 0.036615196615457535
critic loss: 0.0016451060073450208
actor loss: 0.06297078728675842
critic loss: 0.0009272241732105613
actor loss: 0.016817711293697357
critic loss: 0.003429603995755315
actor loss: 0.011444199830293655
critic loss: 0.0026247058995068073
actor loss: 0.052728813141584396
critic loss: 0.0010092580923810601
actor loss: 0.03598751872777939
critic loss: 0.0009336352231912315
actor loss: 0.05310230702161789
critic loss: 0.0018370841862633824
actor loss: 0.038152504712343216
critic loss: 0.0013316484401002526
actor loss: 0.04654508829116821
critic loss: 0.0008229591185227036
actor loss: 0.068326

critic loss: 0.0010658812243491411
actor loss: 0.03302031755447388
critic loss: 0.001357667031697929
actor loss: 0.05205029249191284
critic loss: 0.001351450104266405
actor loss: 0.02735329046845436
critic loss: 0.0027561006136238575
actor loss: 0.026802167296409607
critic loss: 0.0011845040135085583
actor loss: 0.03853747248649597
critic loss: 0.0016014730790629983
actor loss: 0.037080273032188416
critic loss: 0.0008604407194070518
actor loss: 0.016189901158213615
critic loss: 0.0010022444184869528
actor loss: 0.007375895977020264
critic loss: 0.0017650985391810536
actor loss: 0.007752908393740654
critic loss: 0.0033054419327527285
actor loss: 0.05025920271873474
critic loss: 0.0006803154828958213
actor loss: 0.024492288008332253
critic loss: 0.0017209681682288647
actor loss: 0.05536907911300659
critic loss: 0.0018590637482702732
actor loss: 0.02551894262433052
critic loss: 0.0015010673087090254
actor loss: 0.020259149372577667
critic loss: 0.0015924056060612202
actor loss: 0.04159089

critic loss: 0.0007696880493313074
actor loss: 0.05850258097052574
critic loss: 0.001324932323768735
actor loss: 0.019870225340127945
critic loss: 0.000881838146597147
actor loss: 0.04274757578969002
critic loss: 0.0009271432645618916
actor loss: 0.017182383686304092
critic loss: 0.0017813399899750948
actor loss: 0.056999579071998596
critic loss: 0.0032510999590158463
actor loss: 0.015402719378471375
critic loss: 0.0017617462435737252
actor loss: 0.03164895996451378
critic loss: 0.0012555561261251569
actor loss: 0.033324696123600006
critic loss: 0.0010445776861160994
actor loss: 0.018081970512866974
critic loss: 0.0012238784693181515
actor loss: 0.06526722013950348
critic loss: 0.002781185321509838
actor loss: 0.02576509676873684
critic loss: 0.0011047571897506714
actor loss: 0.029713023453950882
critic loss: 0.0014714316930621862
actor loss: 0.04388292878866196
critic loss: 0.0011277372250333428
actor loss: 0.026464803144335747
critic loss: 0.0006132813869044185
actor loss: 0.03293567

critic loss: 0.001037816284224391
actor loss: 0.060546744614839554
critic loss: 0.0006634494056925178
actor loss: 0.06016205623745918
critic loss: 0.001317034475505352
actor loss: 0.06450314819812775
critic loss: 0.0006484162877313793
actor loss: 0.023328309878706932
critic loss: 0.0010889476398006082
actor loss: 0.03682761266827583
critic loss: 0.0007102980744093657
actor loss: 0.0026864917017519474
critic loss: 0.0006506828940473497
actor loss: 0.04834861680865288
critic loss: 0.0013968376442790031
actor loss: 0.03912097215652466
critic loss: 0.001533306553028524
actor loss: 0.04514668509364128
critic loss: 0.0014196322299540043
actor loss: 0.040781695395708084
critic loss: 0.002102019963786006
actor loss: 0.03208116814494133
critic loss: 0.0019355501281097531
actor loss: 0.0432591587305069
critic loss: 0.001040909206494689
actor loss: 0.04234180599451065
critic loss: 0.0012353905476629734
actor loss: 0.04796969145536423
critic loss: 0.001125076785683632
actor loss: 0.022907543927431

critic loss: 0.001913480693474412
actor loss: 0.03541085869073868
critic loss: 0.000806159689091146
actor loss: 0.03855253383517265
critic loss: 0.000795486499555409
actor loss: 0.05390197038650513
critic loss: 0.002109936438500881
actor loss: 0.028903670608997345
critic loss: 0.0011911909095942974
actor loss: 0.0430615171790123
critic loss: 0.0006132454727776349
actor loss: 0.03439038246870041
critic loss: 0.0018650745041668415
actor loss: 0.025230592116713524
critic loss: 0.00225058407522738
actor loss: -0.0035259942524135113
critic loss: 0.0009297827491536736
actor loss: 0.00120201101526618
critic loss: 0.0018480723956599832
actor loss: 0.031728535890579224
critic loss: 0.0015585075598210096
actor loss: 0.04011433571577072
critic loss: 0.0012257024645805359
actor loss: 0.019312061369419098
critic loss: 0.001342578325420618
actor loss: 0.02532717026770115
critic loss: 0.0011486441362649202
actor loss: 0.018536966294050217
critic loss: 0.0013759098947048187
actor loss: 0.0089950207620

critic loss: 0.0009478236897848547
actor loss: 0.035024795681238174
critic loss: 0.0011369121493771672
actor loss: 0.02620880864560604
critic loss: 0.0011489035096019506
actor loss: 0.01449344027787447
critic loss: 0.0021097357384860516
actor loss: 0.06277960538864136
critic loss: 0.0024781685788184404
actor loss: 0.035431042313575745
critic loss: 0.0014074270147830248
actor loss: 0.026280637830495834
critic loss: 0.0011725295335054398
actor loss: 0.07681559026241302
critic loss: 0.0007859246688894928
actor loss: 0.05550947040319443
critic loss: 0.0013052973663434386
actor loss: 0.028790229931473732
critic loss: 0.0009686483535915613
actor loss: 0.05557391047477722
critic loss: 0.0014807209372520447
actor loss: 0.03681274503469467
critic loss: 0.001187239307910204
actor loss: 0.023373590782284737
critic loss: 0.0009297198848798871
actor loss: -0.00010272162035107613
critic loss: 0.002170630730688572
actor loss: 0.03072241321206093
critic loss: 0.0007857781602069736
actor loss: 0.026774

critic loss: 0.0017603723099455237
actor loss: 0.03241291642189026
critic loss: 0.0008563298615626991
actor loss: 0.0368707999587059
critic loss: 0.0010750177316367626
actor loss: 0.04126998782157898
critic loss: 0.0016763624735176563
actor loss: -8.088536560535431e-05
critic loss: 0.0012259450741112232
actor loss: 0.009534778073430061
critic loss: 0.0007233470678329468
actor loss: 0.020847100764513016
critic loss: 0.001811434281989932
actor loss: 0.058820370584726334
critic loss: 0.0015727418940514326
actor loss: 0.04537547379732132
critic loss: 0.0013084500096738338
actor loss: -0.0044138869270682335
critic loss: 0.0031190377194434404
actor loss: 0.04807916283607483
critic loss: 0.0012879545101895928
actor loss: 0.01465585920959711
critic loss: 0.0017724093049764633
actor loss: 0.02107151597738266
critic loss: 0.0012135684955865145
actor loss: 0.010513312183320522
critic loss: 0.0021363329142332077
actor loss: 0.05056088790297508
critic loss: 0.0009322999394498765
actor loss: 0.01802

critic loss: 0.0008023724076338112
actor loss: -0.009865609928965569
critic loss: 0.0017511664191260934
actor loss: 0.02571619302034378
critic loss: 0.0019467870006337762
actor loss: 0.04012727737426758
critic loss: 0.0017600285355001688
actor loss: 0.02973662130534649
critic loss: 0.0013156819622963667
actor loss: 0.01632695645093918
critic loss: 0.0012960444437339902
actor loss: 0.029424887150526047
critic loss: 0.0016691312193870544
actor loss: 0.016827300190925598
critic loss: 0.0022213850170373917
actor loss: 0.0395907461643219
critic loss: 0.0017202876042574644
actor loss: 0.02337217889726162
critic loss: 0.0010219996329396963
actor loss: 0.06008544936776161
critic loss: 0.0030633858405053616
actor loss: 0.052283741533756256
critic loss: 0.0012385539012029767
actor loss: 0.04912451654672623
critic loss: 0.0017425600672140718
actor loss: 0.04390531778335571
critic loss: 0.0008908075978979468
actor loss: 0.013842294923961163
critic loss: 0.0006506455247290432
actor loss: 0.03389129

critic loss: 0.0026304733473807573
actor loss: 0.0420512892305851
critic loss: 0.001031267805956304
actor loss: 0.025037501007318497
critic loss: 0.0008771342691034079
actor loss: 0.031082458794116974
critic loss: 0.0011077759554609656
actor loss: 0.019372910261154175
critic loss: 0.0011274988064542413
actor loss: 0.05446082353591919
critic loss: 0.0008994214003905654
actor loss: 0.022934891283512115
critic loss: 0.002041386906057596
actor loss: 0.017446210607886314
critic loss: 0.0008859958034008741
actor loss: 0.043414629995822906
critic loss: 0.0017053241608664393
actor loss: 0.05663149803876877
critic loss: 0.0008060433901846409
actor loss: 0.011107763275504112
critic loss: 0.0011043563717976213
actor loss: 0.02283809706568718
critic loss: 0.0011704303324222565
actor loss: 0.03501398488879204
critic loss: 0.000798612367361784
actor loss: 0.05409456044435501
critic loss: 0.0007682492723688483
actor loss: -0.03393600136041641
critic loss: 0.001130098826251924
actor loss: 0.0520461685

critic loss: 0.0009651332511566579
actor loss: 0.030417006462812424
critic loss: 0.0006297009531408548
actor loss: 0.026692617684602737
critic loss: 0.0028876035939902067
actor loss: 0.03699186444282532
critic loss: 0.0014417202910408378
actor loss: 0.028442397713661194
critic loss: 0.0013746963813900948
actor loss: 0.04067349061369896
critic loss: 0.0009863963350653648
actor loss: 0.009629308246076107
critic loss: 0.0012073030229657888
actor loss: 0.020702654495835304
critic loss: 0.0016485557425767183
actor loss: 0.01801491156220436
critic loss: 0.0021600937470793724
actor loss: 0.04142105579376221
critic loss: 0.0032832869328558445
actor loss: 0.038443855941295624
critic loss: 0.0015681206714361906
actor loss: 0.039796192198991776
critic loss: 0.002155449939891696
actor loss: 0.00785259623080492
critic loss: 0.002111429814249277
actor loss: 0.024101320654153824
critic loss: 0.002206489909440279
actor loss: 0.0364113375544548
critic loss: 0.0008388500427827239
actor loss: 0.029888924

critic loss: 0.0013463246868923306
actor loss: 0.017767127603292465
critic loss: 0.0010717817349359393
actor loss: 0.020210882648825645
critic loss: 0.0011217918945476413
actor loss: 0.05351611599326134
critic loss: 0.0008947094320319593
actor loss: 0.04410883039236069
critic loss: 0.0009690013830550015
actor loss: 0.043772779405117035
critic loss: 0.0011301935883238912
actor loss: 0.03189975395798683
critic loss: 0.0009281175443902612
actor loss: 0.04589231312274933
critic loss: 0.0011132098734378815
actor loss: 0.05781179666519165
critic loss: 0.0009219027706421912
actor loss: 0.0551350973546505
critic loss: 0.0021671417634934187
actor loss: 0.03083129972219467
critic loss: 0.0010821912437677383
actor loss: 0.03281641751527786
critic loss: 0.0006451895460486412
actor loss: 0.042987897992134094
critic loss: 0.0010644926223903894
actor loss: -0.007636315189301968
critic loss: 0.0009877793490886688
actor loss: 0.030053092166781425
critic loss: 0.0012331008911132812
actor loss: 0.0155004

critic loss: 0.0009211478754878044
actor loss: 0.002439219504594803
critic loss: 0.0008426408749073744
actor loss: 0.06127217411994934
critic loss: 0.0010960373328998685
actor loss: 0.023235835134983063
critic loss: 0.0013289154740050435
actor loss: 0.03380817174911499
critic loss: 0.0009353150962851942
actor loss: 0.04912436380982399
critic loss: 0.0028502587229013443
actor loss: 0.04170779883861542
critic loss: 0.0005456042708829045
actor loss: 0.021721774712204933
critic loss: 0.0007625119760632515
actor loss: 0.023517660796642303
critic loss: 0.0033861962147057056
actor loss: -0.0015236004255712032
critic loss: 0.0011879383819177747
actor loss: 0.051427192986011505
critic loss: 0.0008766174432821572
actor loss: 0.0459122471511364
critic loss: 0.0014001884264871478
actor loss: 0.0075784591026604176
critic loss: 0.001964302733540535
actor loss: 0.03316056728363037
critic loss: 0.0012145090149715543
actor loss: 0.019467774778604507
critic loss: 0.0009022449376061559
actor loss: 0.0411

critic loss: 0.0008589084609411657
actor loss: 0.027462724596261978
critic loss: 0.0010185299906879663
actor loss: 0.02419295534491539
critic loss: 0.000605257460847497
actor loss: 0.023309169337153435
critic loss: 0.0012123850174248219
actor loss: 0.03956657648086548
critic loss: 0.0013031612616032362
actor loss: 0.08747251331806183
critic loss: 0.0005111100617796183
actor loss: 0.04242360591888428
critic loss: 0.0015568070812150836
actor loss: 0.009058915078639984
critic loss: 0.00047181526315398514
actor loss: 0.021466413512825966
critic loss: 0.0009551978437229991
actor loss: 0.03632376343011856
critic loss: 0.0007677820976823568
actor loss: 0.0248737595975399
critic loss: 0.0017718104645609856
actor loss: 0.04097253829240799
critic loss: 0.0007530414732173085
actor loss: 0.02291206829249859
critic loss: 0.000985648948699236
actor loss: 0.029367586597800255
critic loss: 0.0024223856162279844
actor loss: 0.014212870970368385
critic loss: 0.0013036004966124892
actor loss: 0.055616769

critic loss: 0.0010515502654016018
actor loss: 0.05709128826856613
critic loss: 0.0010283287847414613
actor loss: -0.004248984158039093
critic loss: 0.0008414876065216959
actor loss: 0.005140147637575865
critic loss: 0.0007891534478403628
actor loss: 0.018323928117752075
critic loss: 0.0017915552016347647
actor loss: 0.018231164664030075
critic loss: 0.001175829442217946
actor loss: 0.010157544165849686
critic loss: 0.0006906447233632207
actor loss: 0.029837673529982567
critic loss: 0.0010201261611655354
actor loss: 0.025136899203062057
critic loss: 0.0023438262287527323
actor loss: 0.03674176335334778
critic loss: 0.0012562170159071684
actor loss: 0.045204732567071915
critic loss: 0.0011070914333686233
actor loss: 0.03189147263765335
critic loss: 0.0011839589569717646
actor loss: -0.013605920597910881
critic loss: 0.000956755829975009
actor loss: 0.032208673655986786
critic loss: 0.0013058217009529471
actor loss: 0.035804878920316696
critic loss: 0.0010528572602197528
actor loss: 0.00

critic loss: 0.000759307702537626
actor loss: 0.041566476225852966
critic loss: 0.0007478485349565744
actor loss: -0.009672917425632477
critic loss: 0.0012061065062880516
actor loss: 0.029030336067080498
critic loss: 0.0008226973004639149
actor loss: 0.022030489519238472
critic loss: 0.0008127827313728631
actor loss: 0.01414468977600336
critic loss: 0.0008080624975264072
actor loss: 0.023056011646986008
critic loss: 0.002412655856460333
actor loss: 0.02996174618601799
critic loss: 0.0011587031185626984
actor loss: -0.006615823600441217
critic loss: 0.0016697435639798641
actor loss: 0.06224943324923515
critic loss: 0.0005528940819203854
actor loss: 0.028468845412135124
critic loss: 0.00103879498783499
actor loss: 0.05397193878889084
critic loss: 0.001377468346618116
actor loss: -0.008575305342674255
critic loss: 0.0019916812889277935
actor loss: 0.017266348004341125
critic loss: 0.0010376758873462677
actor loss: -0.003494780510663986
critic loss: 0.0009899127762764692
actor loss: 0.0342

critic loss: 0.0007250275230035186
actor loss: 0.00503425020724535
critic loss: 0.0008566842298023403
actor loss: 0.024658704176545143
critic loss: 0.0007705491734668612
actor loss: 0.05163358524441719
critic loss: 0.0008271709666587412
actor loss: 0.047804608941078186
critic loss: 0.0006222905358299613
actor loss: 0.03470940142869949
critic loss: 0.0016727200709283352
actor loss: 0.02075931802392006
critic loss: 0.0005064161960035563
actor loss: 0.03445439785718918
critic loss: 0.0016659555258229375
actor loss: 0.02790658175945282
critic loss: 0.001556427450850606
actor loss: 0.021625295281410217
critic loss: 0.0010375528363510966
actor loss: 0.041358016431331635
critic loss: 0.0013514995807781816
actor loss: 0.04465104639530182
critic loss: 0.0006589122349396348
actor loss: 0.04492902755737305
critic loss: 0.001148616778664291
actor loss: -0.008076788857579231
critic loss: 0.001178407110273838
actor loss: 0.029246948659420013
critic loss: 0.0009175364393740892
actor loss: 0.025584263

critic loss: 0.0018105729250237346
actor loss: 0.04704870656132698
critic loss: 0.0015758480876684189
actor loss: 0.04601559787988663
critic loss: 0.0009560525650158525
actor loss: 0.04636554419994354
critic loss: 0.0008439177181571722
actor loss: 0.025556448847055435
critic loss: 0.0008242063340730965
actor loss: 0.02570100873708725
critic loss: 0.0009145267540588975
actor loss: 0.01986003667116165
critic loss: 0.0012298182118684053
actor loss: 0.03651459887623787
critic loss: 0.0011596207041293383
actor loss: 0.028419334441423416
critic loss: 0.001321617979556322
actor loss: 0.04557289928197861
critic loss: 0.0006157195894047618
actor loss: 0.060220956802368164
critic loss: 0.0009336080984212458
actor loss: 0.01753927767276764
critic loss: 0.0010764736216515303
actor loss: 0.027641091495752335
critic loss: 0.0006026634946465492
actor loss: 0.041678283363580704
critic loss: 0.0007837368175387383
actor loss: 0.025699613615870476
critic loss: 0.0013884298969060183
actor loss: 0.03592152

critic loss: 0.0011251810938119888
actor loss: 0.004404893144965172
critic loss: 0.0006251709419302642
actor loss: 0.0334378182888031
critic loss: 0.0009615820017643273
actor loss: 0.025985047221183777
critic loss: 0.0009598476462997496
actor loss: -0.01695834845304489
critic loss: 0.0006299156812019646
actor loss: 0.016026251018047333
critic loss: 0.0006826758617535233
actor loss: 0.02416698820888996
critic loss: 0.0010905505623668432
actor loss: 0.06692565232515335
critic loss: 0.0006412565126083791
actor loss: 0.03684645891189575
critic loss: 0.0006965824286453426
actor loss: 0.015515802428126335
critic loss: 0.0005683954223059118
actor loss: 0.014846006408333778
critic loss: 0.0010798941366374493
actor loss: 0.005875829607248306
critic loss: 0.0013637072406709194
actor loss: 0.028634926304221153
critic loss: 0.0006966177606955171
actor loss: 0.030185425654053688
critic loss: 0.0008154761744663119
actor loss: 0.028885100036859512
critic loss: 0.0010508319828659296
actor loss: 0.0240

critic loss: 0.0009187740506604314
actor loss: 0.028501812368631363
critic loss: 0.001529031665995717
actor loss: 0.01319785788655281
critic loss: 0.0010397606529295444
actor loss: 0.03683508560061455
critic loss: 0.0014062905684113503
actor loss: 0.022538861259818077
critic loss: 0.0010257777757942677
actor loss: 0.01276416890323162
critic loss: 0.0013497322797775269
actor loss: 0.02802376076579094
critic loss: 0.0014510320033878088
actor loss: 0.04295630007982254
critic loss: 0.0008899050299078226
actor loss: 0.025626400485634804
critic loss: 0.0010966326808556914
actor loss: 0.009288708679378033
critic loss: 0.0011774147860705853
actor loss: 0.024684946984052658
critic loss: 0.0006160574266687036
actor loss: 0.05380493775010109
critic loss: 0.0006293612532317638
actor loss: 0.03598978742957115
critic loss: 0.0012997284065932035
actor loss: 0.06127359718084335
critic loss: 0.0019575338810682297
actor loss: 0.025206241756677628
critic loss: 0.002390762558206916
actor loss: 0.010142356

critic loss: 0.0006391828064806759
actor loss: 0.029713429510593414
critic loss: 0.0011009706649929285
actor loss: 0.012880503199994564
critic loss: 0.0011369336862117052
actor loss: 0.05230075865983963
critic loss: 0.0009343283600173891
actor loss: 0.005178304389119148
critic loss: 0.00139702414162457
actor loss: 0.023953769356012344
critic loss: 0.0008033572812564671
actor loss: 0.03316560387611389
critic loss: 0.0018627285026013851
actor loss: 0.02041238360106945
critic loss: 0.001311361906118691
actor loss: 0.04938773810863495
critic loss: 0.0010995250195264816
actor loss: 0.014677274972200394
critic loss: 0.0013282719301059842
actor loss: 0.019443539902567863
critic loss: 0.0015406907768920064
actor loss: 0.018061522394418716
critic loss: 0.0018643031362444162
actor loss: 0.01691552624106407
critic loss: 0.0007219763938337564
actor loss: 0.03891895338892937
critic loss: 0.0008868658333085477
actor loss: 0.03242529183626175
critic loss: 0.0019535142928361893
actor loss: 0.013624319

critic loss: 0.0007216352387331426
actor loss: 0.0028900224715471268
critic loss: 0.0010087655391544104
actor loss: 0.051651954650878906
critic loss: 0.0008946210145950317
actor loss: 0.03224752098321915
critic loss: 0.0006682726670987904
actor loss: 0.030696280300617218
critic loss: 0.0006728298612870276
actor loss: 0.044209931045770645
critic loss: 0.0005978059489279985
actor loss: 0.059452082961797714
critic loss: 0.0031408369541168213
actor loss: 0.060754869133234024
critic loss: 0.0011925608851015568
actor loss: 0.053872644901275635
critic loss: 0.0010904581286013126
actor loss: 0.05844167247414589
critic loss: 0.001126450370065868
actor loss: 0.02583840861916542
critic loss: 0.0014185294276103377
actor loss: 0.05080068111419678
critic loss: 0.0011683175107464194
actor loss: 0.044764067977666855
critic loss: 0.0019879916217178106
actor loss: -0.005808435846120119
critic loss: 0.0009095792192965746
actor loss: 0.03928453475236893
critic loss: 0.002921741222962737
actor loss: 0.0316

critic loss: 0.0007860088953748345
actor loss: 0.013250933960080147
critic loss: 0.0010797090362757444
actor loss: 0.03758146986365318
critic loss: 0.0018280274234712124
actor loss: 0.02163475565612316
critic loss: 0.0008463246049359441
actor loss: 0.00581442192196846
critic loss: 0.00057740363990888
actor loss: 0.05716317519545555
critic loss: 0.001070875208824873
actor loss: 0.02216930128633976
critic loss: 0.0018058812711387873
actor loss: 0.045303020626306534
critic loss: 0.001994082238525152
actor loss: 0.037644512951374054
critic loss: 0.0015101649332791567
actor loss: 0.01866624876856804
critic loss: 0.0023773740977048874
actor loss: 0.0072177983820438385
critic loss: 0.0011188746429979801
actor loss: 0.01684724912047386
critic loss: 0.0012947125360369682
actor loss: 0.025393065065145493
critic loss: 0.0014876513741910458
actor loss: 0.013758426532149315
critic loss: 0.0007435071747750044
actor loss: 0.01691344752907753
critic loss: 0.0010667808819562197
actor loss: 0.0209408178

critic loss: 0.0007168382289819419
actor loss: 0.041271649301052094
critic loss: 0.001260601682588458
actor loss: 0.008361236192286015
critic loss: 0.0014695720747113228
actor loss: 0.011796998791396618
critic loss: 0.0004543007235042751
actor loss: 0.013052936643362045
critic loss: 0.0006420614663511515
actor loss: 0.0509156696498394
critic loss: 0.001475612516514957
actor loss: 0.061503805220127106
critic loss: 0.0004492084844969213
actor loss: 0.016691653057932854
critic loss: 0.001645024516619742
actor loss: 0.03759429603815079
critic loss: 0.0008705356158316135
actor loss: 0.04903509467840195
critic loss: 0.0009142259368672967
actor loss: 0.021063629537820816
critic loss: 0.0009682509116828442
actor loss: 0.03869227319955826
critic loss: 0.0009970106184482574
actor loss: 0.021556565538048744
critic loss: 0.0018315776251256466
actor loss: 0.028573032468557358
critic loss: 0.0010732585797086358
actor loss: -0.007431595586240292
critic loss: 0.0007793242111802101
actor loss: 0.025456

critic loss: 0.0007585902931168675
actor loss: 0.02205520123243332
critic loss: 0.0009848133195191622
actor loss: 0.04248266667127609
critic loss: 0.0007270954665727913
actor loss: 0.021120626479387283
critic loss: 0.0009307478321716189
actor loss: 0.017295347526669502
critic loss: 0.001063919859007001
actor loss: 0.004386156797409058
critic loss: 0.000976602896116674
actor loss: 0.019692741334438324
critic loss: 0.0019515963504090905
actor loss: 0.013134712353348732
critic loss: 0.0013767643831670284
actor loss: 0.020294994115829468
critic loss: 0.0005189638468436897
actor loss: 0.031808190047740936
critic loss: 0.0006769206956960261
actor loss: 0.033525191247463226
critic loss: 0.0005430696764960885
actor loss: -0.0010327277705073357
critic loss: 0.0007727626361884177
actor loss: 0.018182503059506416
critic loss: 0.001566196558997035
actor loss: 0.03783566877245903
critic loss: 0.0006717525538988411
actor loss: 0.028025634586811066
critic loss: 0.0013368655927479267
actor loss: 0.043

critic loss: 0.0008226766949519515
actor loss: 0.006379527971148491
critic loss: 0.0027907704934477806
actor loss: 0.023427940905094147
critic loss: 0.0008869399898685515
actor loss: 0.0014300988987088203
critic loss: 0.0008815741166472435
actor loss: 0.032687876373529434
critic loss: 0.0009816514793783426
actor loss: 0.014918841421604156
critic loss: 0.000552868063095957
actor loss: 0.033639997243881226
critic loss: 0.0009653759188950062
actor loss: 0.022571953013539314
critic loss: 0.0028036078438162804
actor loss: 0.0298675075173378
critic loss: 0.0011775156017392874
actor loss: 0.03388093039393425
critic loss: 0.0020743112545460463
actor loss: 0.023544929921627045
critic loss: 0.002013296354562044
actor loss: 0.015565712004899979
critic loss: 0.0007596750510856509
actor loss: 0.04406401515007019
critic loss: 0.000404384481953457
actor loss: 0.04244251176714897
critic loss: 0.001274383394047618
actor loss: 0.03860444948077202
critic loss: 0.0008963321452029049
actor loss: 0.05643892

critic loss: 0.0007001933408901095
actor loss: 0.04076516628265381
critic loss: 0.0006421749712899327
actor loss: 0.014329694211483002
critic loss: 0.0013792063109576702
actor loss: 0.034334614872932434
critic loss: 0.000864973699208349
actor loss: 0.0338885635137558
critic loss: 0.0008334724698215723
actor loss: 0.042805325239896774
critic loss: 0.001710417214781046
actor loss: 0.06133183836936951
critic loss: 0.001892788684926927
actor loss: 0.017559939995408058
critic loss: 0.0005871729226782918
actor loss: 0.028337914496660233
critic loss: 0.0020738267339766026
actor loss: 0.05997089296579361
critic loss: 0.0007977087516337633
actor loss: -0.0002681654877960682
critic loss: 0.0007975534535944462
actor loss: 0.0011611804366111755
critic loss: 0.0009541580220684409
actor loss: 0.02362965978682041
critic loss: 0.0012902561575174332
actor loss: 0.006682157516479492
critic loss: 0.0011988438200205564
actor loss: 0.02497471310198307
critic loss: 0.0006368879112415016
actor loss: 0.024561

critic loss: 0.0016931724967435002
actor loss: 0.005052490625530481
critic loss: 0.0007624754798598588
actor loss: 0.056788370013237
critic loss: 0.0015582696069031954
actor loss: -0.020169835537672043
critic loss: 0.004996647592633963
actor loss: 0.03563770651817322
critic loss: 0.0012209592387080193
actor loss: 0.027214590460062027
critic loss: 0.001476595178246498
actor loss: 0.014983540400862694
critic loss: 0.0004768341314047575
actor loss: 0.0288039930164814
critic loss: 0.002214818261563778
actor loss: 0.05273689329624176
critic loss: 0.0018613990396261215
actor loss: 0.014139295555651188
critic loss: 0.0016377537976950407
actor loss: 0.03128223866224289
critic loss: 0.0008247297955676913
actor loss: 0.03806287422776222
critic loss: 0.0008099978440441191
actor loss: 0.004129855427891016
critic loss: 0.0008538584224879742
actor loss: 0.04463345557451248
critic loss: 0.000533048645593226
actor loss: 0.02141110599040985
critic loss: 0.0006349611794576049
actor loss: 0.0101169832050

critic loss: 0.0013866230146959424
actor loss: 0.03496526926755905
critic loss: 0.001243865117430687
actor loss: 0.014540904201567173
critic loss: 0.0011985498713329434
actor loss: 0.009310849942266941
critic loss: 0.0012664541136473417
actor loss: -0.03596881777048111
critic loss: 0.0007324476027861238
actor loss: 0.0387440025806427
critic loss: 0.0008853453327901661
actor loss: 0.03566437587141991
critic loss: 0.0006341126281768084
actor loss: 0.023620333522558212
critic loss: 0.005256642121821642
actor loss: 0.021873150020837784
critic loss: 0.001451988355256617
actor loss: 0.00859140045940876
critic loss: 0.003445525886490941
actor loss: 0.0437026172876358
critic loss: 0.0005440431414172053
actor loss: 0.0599709153175354
critic loss: 0.0008280721958726645
actor loss: 0.01435195654630661
critic loss: 0.0008383899694308639
actor loss: 0.04822560027241707
critic loss: 0.0008171203080564737
actor loss: 0.03021542727947235
critic loss: 0.0013134604087099433
actor loss: 0.049717076122760

critic loss: 0.000566044298466295
actor loss: 0.01241016574203968
critic loss: 0.000784345786087215
actor loss: 0.026505332440137863
critic loss: 0.0013107480481266975
actor loss: 0.033005304634571075
critic loss: 0.00044715794501826167
actor loss: 0.025766445323824883
critic loss: 0.002222720067948103
actor loss: 0.01947150006890297
critic loss: 0.0004760227748192847
actor loss: 0.030728384852409363
critic loss: 0.0011382948141545057
actor loss: 0.02258121222257614
critic loss: 0.0005369663122110069
actor loss: 0.022990448400378227
critic loss: 0.0009679626673460007
actor loss: 0.010668943636119366
critic loss: 0.0008139164419844747
actor loss: 0.013646782375872135
critic loss: 0.0009059164440259337
actor loss: -0.008718571625649929
critic loss: 0.0005152877420186996
actor loss: 0.023934409022331238
critic loss: 0.00076388317393139
actor loss: 0.0227514635771513
critic loss: 0.0008301993366330862
actor loss: 0.004887787159532309
critic loss: 0.0007025593658909202
actor loss: 0.0328658

critic loss: 0.0004980377852916718
actor loss: 0.03990904241800308
critic loss: 0.0017736812587827444
actor loss: 0.029216118156909943
critic loss: 0.0009721355745568871
actor loss: 0.016865836456418037
critic loss: 0.0010149844456464052
actor loss: 0.04337482899427414
critic loss: 0.0005761997308582067
actor loss: 0.016895625740289688
critic loss: 0.0015813075006008148
actor loss: 0.02627773955464363
critic loss: 0.0005723438807763159
actor loss: 0.02987997606396675
critic loss: 0.0005136001855134964
actor loss: 0.0357927605509758
critic loss: 0.000990992528386414
actor loss: 0.02998458594083786
critic loss: 0.00256813270971179
actor loss: 0.060272522270679474
critic loss: 0.0010372118558734655
actor loss: 0.011179509572684765
critic loss: 0.0004069179412908852
actor loss: 0.02272777631878853
critic loss: 0.0016100782668218017
actor loss: 0.016726193949580193
critic loss: 0.0009065953781828284
actor loss: 0.053568389266729355
critic loss: 0.001436016522347927
actor loss: 0.01590939238

critic loss: 0.0005846966523677111
actor loss: 0.02046741172671318
critic loss: 0.0008663017069920897
actor loss: -0.0003066752105951309
critic loss: 0.0014219821896404028
actor loss: 0.03744480386376381
critic loss: 0.0008308608666993678
actor loss: 0.03319042548537254
critic loss: 0.0009738631779327989
actor loss: 0.01965365745127201
critic loss: 0.0007150843739509583
actor loss: 0.03866860643029213
critic loss: 0.0016175844939425588
actor loss: 0.01721060834825039
critic loss: 0.0014241148019209504
actor loss: 0.017272107303142548
critic loss: 0.00047304731560871005
actor loss: -0.004312373697757721
critic loss: 0.0017406884580850601
actor loss: 0.04117643088102341
critic loss: 0.0005334343877620995
actor loss: 0.025283459573984146
critic loss: 0.0005095422966405749
actor loss: 0.021809915080666542
critic loss: 0.0011640535667538643
actor loss: 0.014052076265215874
critic loss: 0.0011462671682238579
actor loss: -5.2141956984996796e-05
critic loss: 0.00043878081487491727
actor loss: 

critic loss: 0.000782480463385582
actor loss: 0.0637453943490982
critic loss: 0.000992286717519164
actor loss: 0.0191941000521183
critic loss: 0.0012168345274403691
actor loss: 0.039240747690200806
critic loss: 0.00473350565880537
actor loss: 0.003008604049682617
critic loss: 0.0006923776818439364
actor loss: 0.02505318447947502
critic loss: 0.0005193126271478832
actor loss: 0.03515925258398056
critic loss: 0.0007369575323536992
actor loss: 0.030221175402402878
critic loss: 0.0016452292911708355
actor loss: 0.03694162890315056
critic loss: 0.0006617375765927136
actor loss: 0.043326620012521744
critic loss: 0.0009409936028532684
actor loss: 0.023932209238409996
critic loss: 0.0021236015018075705
actor loss: 0.0495360791683197
critic loss: 0.0005354434251785278
actor loss: 0.03492830693721771
critic loss: 0.0009860116988420486
actor loss: 0.048060379922389984
critic loss: 0.0006362509448081255
actor loss: 0.04950588941574097
critic loss: 0.0010618153028190136
actor loss: 0.01188987866044

critic loss: 0.0006734563503414392
actor loss: 0.04099457710981369
critic loss: 0.0006056412821635604
actor loss: 0.06043021380901337
critic loss: 0.0007213522330857813
actor loss: -0.011308321729302406
critic loss: 0.0007229229668155313
actor loss: 0.03301997110247612
critic loss: 0.0007919074269011617
actor loss: -0.001962129957973957
critic loss: 0.0005273694987408817
actor loss: 0.01578379236161709
critic loss: 0.0004211676714476198
actor loss: 0.045435257256031036
critic loss: 0.0007456368766725063
actor loss: 0.04602862894535065
critic loss: 0.0003865742473863065
actor loss: 0.04446220025420189
critic loss: 0.0010190116008743644
actor loss: 0.024130402132868767
critic loss: 0.0014668359654024243
actor loss: 0.020256802439689636
critic loss: 0.0011123507283627987
actor loss: 0.03152722865343094
critic loss: 0.0007141482783481479
actor loss: 0.027261313050985336
critic loss: 0.0007630245527252555
actor loss: 0.01504913903772831
critic loss: 0.000699105323292315
actor loss: 0.015451

critic loss: 0.0004578254884108901
actor loss: 0.036539241671562195
critic loss: 0.0006196731701493263
actor loss: 0.037400584667921066
critic loss: 0.0007505345856770873
actor loss: 0.02288450300693512
critic loss: 0.0005986523465253413
actor loss: 0.035827986896038055
critic loss: 0.0006679410580545664
actor loss: 0.06607520580291748
critic loss: 0.00033551326487213373
actor loss: 0.035446736961603165
critic loss: 0.0006249442230910063
actor loss: 0.0416344478726387
critic loss: 0.001573952496983111
actor loss: 0.01476235594600439
critic loss: 0.0020114658400416374
actor loss: 0.02374139428138733
critic loss: 0.0008047714363783598
actor loss: 0.031147442758083344
critic loss: 0.0009218967752531171
actor loss: 0.07428765296936035
critic loss: 0.0013754221145063639
actor loss: 0.01578332856297493
critic loss: 0.0012020028661936522
actor loss: 0.0237799771130085
critic loss: 0.0010029722470790148
actor loss: 0.01890713721513748
critic loss: 0.00044033833546563983
actor loss: 0.042812280

critic loss: 0.0016615407075732946
actor loss: 0.04333636909723282
critic loss: 0.0026788816321641207
actor loss: 0.00903063453733921
critic loss: 0.0012457414995878935
actor loss: 0.05374832823872566
critic loss: 0.001648235833272338
actor loss: 0.010760581120848656
critic loss: 0.0007529311114922166
actor loss: 0.00990265142172575
critic loss: 0.0004150060412939638
actor loss: 0.0310337133705616
critic loss: 0.0008475734503008425
actor loss: 0.012720227241516113
critic loss: 0.001863091136328876
actor loss: 0.0341176837682724
critic loss: 0.0017895168857648969
actor loss: 0.004196492955088615
critic loss: 0.0010840985924005508
actor loss: 0.002735513262450695
critic loss: 0.0008796687470749021
actor loss: 0.01659853756427765
critic loss: 0.004055290017277002
actor loss: 0.01427827961742878
critic loss: 0.001247210195288062
actor loss: 0.005205025430768728
critic loss: 0.0008674435084685683
actor loss: 0.06955959647893906
critic loss: 0.0010519607458263636
actor loss: 0.01700385473668

critic loss: 0.0007149368757382035
actor loss: 0.032967209815979004
critic loss: 0.0010654303478077054
actor loss: -0.002694277558475733
critic loss: 0.0005735372542403638
actor loss: 0.026183104142546654
critic loss: 0.0006774042267352343
actor loss: 0.04474334046244621
critic loss: 0.0012887476477771997
actor loss: 0.023482851684093475
critic loss: 0.0012275660410523415
actor loss: 0.019031796604394913
critic loss: 0.001943429233506322
actor loss: 0.02667607180774212
critic loss: 0.0004969965666532516
actor loss: 0.01269177533686161
critic loss: 0.00042322278022766113
actor loss: 0.007919756695628166
critic loss: 0.0016530583379790187
actor loss: 0.021684670820832253
critic loss: 0.00334257073700428
actor loss: 0.003123704344034195
critic loss: 0.0010017129825428128
actor loss: 0.06333862990140915
critic loss: 0.0006287300493568182
actor loss: 0.036724548786878586
critic loss: 0.003061088500544429
actor loss: 0.0377969890832901
critic loss: 0.0009269553702324629
actor loss: 0.0292841

critic loss: 0.0010778411524370313
actor loss: 0.0005187885835766792
critic loss: 0.0004814644926227629
actor loss: 0.04692891985177994
critic loss: 0.0005953568033874035
actor loss: 0.03103938326239586
critic loss: 0.0007406634977087379
actor loss: 0.05317986011505127
critic loss: 0.0013349761720746756
actor loss: 0.03207607567310333
critic loss: 0.0013015980366617441
actor loss: 0.025141652673482895
critic loss: 0.0003235248732380569
actor loss: -0.002455048728734255
critic loss: 0.0010465094819664955
actor loss: 0.02018425427377224
critic loss: 0.0005755306337960064
actor loss: 0.02698192372918129
critic loss: 0.0006387740140780807
actor loss: 0.005971631035208702
critic loss: 0.0008576228865422308
actor loss: 0.02324545755982399
critic loss: 0.00078210374340415
actor loss: 0.005839996039867401
critic loss: 0.001034302986226976
actor loss: 0.038502857089042664
critic loss: 0.00044413970317691565
actor loss: 0.05453167110681534
critic loss: 0.0011834022589027882
actor loss: 0.0281737

critic loss: 0.0010483551304787397
actor loss: 0.041629236191511154
critic loss: 0.0012720203958451748
actor loss: 0.02735288254916668
critic loss: 0.0007018417818471789
actor loss: 0.035921189934015274
critic loss: 0.0017405180260539055
actor loss: 0.023448601365089417
critic loss: 0.0004750154330395162
actor loss: 0.034547194838523865
critic loss: 0.0008335302118211985
actor loss: 0.04155643284320831
critic loss: 0.0008860233938321471
actor loss: 0.02650958113372326
critic loss: 0.000645355845335871
actor loss: 0.03686361387372017
critic loss: 0.00138291553594172
actor loss: 0.02343832515180111
critic loss: 0.0006331088952720165
actor loss: 0.024586284533143044
critic loss: 0.0011122804135084152
actor loss: 0.036891110241413116
critic loss: 0.00040129650733433664
actor loss: 0.02082883194088936
critic loss: 0.0009942470351234078
actor loss: 0.04715126007795334
critic loss: 0.0009928152430802584
actor loss: 0.025733619928359985
critic loss: 0.00046327183372341096
actor loss: 0.0416115

critic loss: 0.0007913036388345063
actor loss: 0.027702633291482925
critic loss: 0.0007264387095347047
actor loss: 0.0476338267326355
critic loss: 0.0004759248695336282
actor loss: 0.04395423084497452
critic loss: 0.001324944430962205
actor loss: 0.03666059672832489
critic loss: 0.0008150672656483948
actor loss: 0.019299998879432678
critic loss: 0.00045240845065563917
actor loss: 0.04490446299314499
critic loss: 0.0008813602617010474
actor loss: 0.04122113808989525
critic loss: 0.0023300517350435257
actor loss: 0.0433398075401783
critic loss: 0.0016476876335218549
actor loss: 0.030206646770238876
critic loss: 0.0006476218113675714
actor loss: 0.020447174087166786
critic loss: 0.0006358944810926914
actor loss: 0.03503022342920303
critic loss: 0.0003953185514546931
actor loss: 0.04430953785777092
critic loss: 0.0005893787601962686
actor loss: 0.03220819681882858
critic loss: 0.0004446161037776619
actor loss: 0.02346063032746315
critic loss: 0.0018007089383900166
actor loss: 0.03971287608

critic loss: 0.0003467951901257038
actor loss: 0.03702273964881897
critic loss: 0.0008695544674992561
actor loss: 0.02104467898607254
critic loss: 0.00038157671224325895
actor loss: 0.015630364418029785
critic loss: 0.0010294793173670769
actor loss: 0.003679037094116211
critic loss: 0.0008767108665779233
actor loss: 0.04788627102971077
critic loss: 0.0011079148389399052
actor loss: 0.02715122327208519
critic loss: 0.00046304328134283423
actor loss: 0.04215317592024803
critic loss: 0.0009824662702158093
actor loss: -0.012899147346615791
critic loss: 0.00040125002851709723
actor loss: 0.02759706601500511
critic loss: 0.0011781314387917519
actor loss: 0.008910879492759705
critic loss: 0.0005724497023038566
actor loss: 0.010227958671748638
critic loss: 0.0006966948858462274
actor loss: 0.010785164311528206
critic loss: 0.0006848305929452181
actor loss: 0.03347925469279289
critic loss: 0.0005694980500265956
actor loss: 0.03261735290288925
critic loss: 0.0006591555429622531
actor loss: 0.016

critic loss: 0.0007745916955173016
actor loss: 0.017673708498477936
critic loss: 0.0012455856194719672
actor loss: 0.012487015686929226
critic loss: 0.0012661346700042486
actor loss: 0.027482975274324417
critic loss: 0.001514981733635068
actor loss: 0.013254516758024693
critic loss: 0.0005979728884994984
actor loss: -0.01176882442086935
critic loss: 0.001416411017999053
actor loss: 0.04991694167256355
critic loss: 0.000663873041048646
actor loss: 0.04550299048423767
critic loss: 0.0009871958754956722
actor loss: 0.02164667658507824
critic loss: 0.0011012605391442776
actor loss: 0.03073696792125702
critic loss: 0.0012775727082043886
actor loss: 0.04179328680038452
critic loss: 0.0006660586223006248
actor loss: 0.04935098811984062
critic loss: 0.0006842605071142316
actor loss: -0.0012751305475831032
critic loss: 0.0013454077998176217
actor loss: 0.033794231712818146
critic loss: 0.0012019517598673701
actor loss: 0.05096143111586571
critic loss: 0.0005937546957284212
actor loss: 0.0153456

critic loss: 0.0007006743107922375
actor loss: 0.01692952960729599
critic loss: 0.0009846335742622614
actor loss: 0.016544640064239502
critic loss: 0.0011149304918944836
actor loss: 0.015278400853276253
critic loss: 0.00042088187183253467
actor loss: 0.06767179071903229
critic loss: 0.0007294920505955815
actor loss: 0.025028564035892487
critic loss: 0.0011069260071963072
actor loss: 0.03321421518921852
critic loss: 0.0008495838847011328
actor loss: 0.030560432001948357
critic loss: 0.0003109285025857389
actor loss: 0.012437183409929276
critic loss: 0.0012164739891886711
actor loss: -0.009047901257872581
critic loss: 0.0010529108112677932
actor loss: 0.022526925429701805
critic loss: 0.0009746566647663713
actor loss: 0.06219101324677467
critic loss: 0.001563697587698698
actor loss: 0.026140619069337845
critic loss: 0.0002924835425801575
actor loss: 0.039059899747371674
critic loss: 0.0018056759145110846
actor loss: 0.010601876303553581
critic loss: 0.0007319044088944793
actor loss: 0.01

critic loss: 0.0008293632417917252
actor loss: 0.032786883413791656
critic loss: 0.0012597660534083843
actor loss: 0.018542464822530746
critic loss: 0.0007023644866421819
actor loss: 0.03197944164276123
critic loss: 0.0009052947862073779
actor loss: 0.03901413083076477
critic loss: 0.001627143006771803
actor loss: 0.020125137642025948
critic loss: 0.0006851301295682788
actor loss: 0.032717641443014145
critic loss: 0.0013802861794829369
actor loss: 0.048536576330661774
critic loss: 0.0007312186062335968
actor loss: 0.014774404466152191
critic loss: 0.0010262829018756747
actor loss: 0.004458250477910042
critic loss: 0.0006692857714369893
actor loss: 0.02581534907221794
critic loss: 0.0006372433854267001
actor loss: 0.0010393261909484863
critic loss: 0.000830454402603209
actor loss: 0.039463408291339874
critic loss: 0.0034511766862124205
actor loss: 0.0454399399459362
critic loss: 0.0005414844490587711
actor loss: 0.02128356508910656
critic loss: 0.0005219178856350482
actor loss: 0.022578

critic loss: 0.0004897452890872955
actor loss: 0.022159140557050705
critic loss: 0.0009643173543736339
actor loss: 0.007539363112300634
critic loss: 0.0002889908500947058
actor loss: 0.03917454555630684
critic loss: 0.0006644002860412002
actor loss: 0.0006953859701752663
critic loss: 0.0003495778364595026
actor loss: 0.061256833374500275
critic loss: 0.0005795780452899635
actor loss: 0.007712294347584248
critic loss: 0.0006169096450321376
actor loss: 0.03458733856678009
critic loss: 0.001088024815544486
actor loss: 0.01726105995476246
critic loss: 0.0006994590512476861
actor loss: 0.02984084188938141
critic loss: 0.0004945757682435215
actor loss: 0.032598793506622314
critic loss: 0.0009298513759858906
actor loss: 0.03953401744365692
critic loss: 0.0010530779836699367
actor loss: 0.03220588341355324
critic loss: 0.0006589366821572185
actor loss: 0.03606066107749939
critic loss: 0.0014303082134574652
actor loss: -0.01545975636690855
critic loss: 0.0012822350254282355
actor loss: 0.034197

critic loss: 0.0017277783481404185
actor loss: 0.01731090247631073
critic loss: 0.0005377844208851457
actor loss: 0.026911795139312744
critic loss: 0.0013874650467187166
actor loss: 0.02620280720293522
critic loss: 0.0009659478673711419
actor loss: 0.023707130923867226
critic loss: 0.000860815227497369
actor loss: 0.04731512814760208
critic loss: 0.0004923166707158089
actor loss: 0.008207942359149456
critic loss: 0.0009302862454205751
actor loss: 0.03979305922985077
critic loss: 0.0021674870513379574
actor loss: 0.06050892546772957
critic loss: 0.0020952315535396338
actor loss: 0.0460960790514946
critic loss: 0.0010074881138280034
actor loss: 0.06077881157398224
critic loss: 0.000871906231623143
actor loss: 0.025268953293561935
critic loss: 0.0005570622161030769
actor loss: 0.040640972554683685
critic loss: 0.0006682260427623987
actor loss: 0.04907979071140289
critic loss: 0.0013013017596676946
actor loss: -0.006419973447918892
critic loss: 0.000942403741646558
actor loss: 0.0074555221

critic loss: 0.0013521166983991861
actor loss: 0.039209045469760895
critic loss: 0.0012613050639629364
actor loss: 0.01851111836731434
critic loss: 0.0007479432970285416
actor loss: 0.03557481989264488
critic loss: 0.000747411628253758
actor loss: 0.026909489184617996
critic loss: 0.0014715425204485655
actor loss: 0.06958265602588654
critic loss: 0.001413810532540083
actor loss: 0.04063127189874649
critic loss: 0.000724626355804503
actor loss: 0.04002568498253822
critic loss: 0.0006433530943468213
actor loss: 0.019536178559064865
critic loss: 0.0007335948757827282
actor loss: 0.023034319281578064
critic loss: 0.0007045703823678195
actor loss: 0.044465601444244385
critic loss: 0.0005478312959894538
actor loss: 0.04435395449399948
critic loss: 0.0014786612009629607
actor loss: 0.016556723043322563
critic loss: 0.0005764039233326912
actor loss: 0.023647408932447433
critic loss: 0.00043830095091834664
actor loss: 0.02408161200582981
critic loss: 0.0022953879088163376
actor loss: 0.01607303

In [38]:
# test
for episode in range(1):
    run_episode(env, buffer, model, render = True, sigma = 0.0)

env.close()

Creating window glfw
