In [1]:
import numpy as np
from stable_baselines3 import PPO, SAC
from gymnasium import spaces, Env
from scipy.interpolate import interp1d

In [2]:
THETA = np.array([0.0, 0.006289475131779909, 0.012578950263559818, 0.018868425861001015,
 0.025157900527119637, 0.03144737705588341, 0.03773685172200203,
 0.04402632638812065, 0.05031580105423927, 0.056605275720357895,
 0.06289475411176682, 0.06918422877788544, 0.07547370344400406,
 0.08176317811012268, 0.0880526527762413, 0.09434212744235992,
 0.10063160210847855, 0.10692107677459717, 0.11321055144071579,
 0.11950002610683441, 0.12578950822353363, 0.13207897543907166,
 0.13836845755577087, 0.1446579247713089, 0.15094740688800812,
 0.15723687410354614, 0.16352635622024536, 0.1698158234357834,
 0.1761053055524826, 0.18239477276802063, 0.18868425488471985,
 0.19497372210025787, 0.2012632042169571, 0.2075526863336563,
 0.21384215354919434, 0.22013163566589355, 0.22642110288143158,
 0.2327105849981308, 0.23900005221366882, 0.24528953433036804,
 0.25157901644706726, 0.2578684687614441, 0.2641579508781433,
 0.27044743299484253, 0.27673691511154175, 0.2830263674259186,
 0.2893158495426178, 0.295605331659317, 0.30189481377601624,
 0.30818429589271545, 0.3144737482070923, 0.3207632303237915,
 0.3270527124404907, 0.33334219455718994, 0.3396316468715668, 0.345921128988266,
 0.3522106111049652, 0.35850009322166443, 0.36478954553604126,
 0.3710790276527405, 0.3773685097694397, 0.3836579918861389,
 0.38994744420051575, 0.39623692631721497, 0.4025264084339142,
 0.4088158905506134, 0.4151053726673126, 0.42139482498168945,
 0.42768430709838867, 0.4339737892150879, 0.4402632713317871,
 0.44655272364616394, 0.45284220576286316, 0.4591316878795624,
 0.4654211699962616, 0.4717106223106384, 0.47800010442733765,
 0.48428958654403687, 0.4905790686607361, 0.4968685209751129,
 0.5031580328941345, 0.5094475150108337, 0.5157369375228882, 0.5220264196395874,
 0.5283159017562866, 0.5346053838729858, 0.5408948659896851, 0.5471843481063843,
 0.5534738302230835, 0.5597633123397827, 0.5660527348518372, 0.5723422169685364,
 0.5786316990852356, 0.5849211812019348, 0.591210663318634, 0.5975001454353333,
 0.6037896275520325, 0.6100791096687317, 0.6163685917854309, 0.6226580142974854,
 0.6289474964141846, 0.6352369785308838, 0.641526460647583, 0.6478159427642822,
 0.6541054248809814, 0.6603949069976807, 0.6666843891143799, 0.6729738116264343,
 0.6792632937431335, 0.6855527758598328, 0.691842257976532, 0.6981317400932312,
 0.7044212222099304, 0.7107107043266296, 0.7170001864433289, 0.7232896685600281,
 0.7295790910720825, 0.7358685731887817, 0.742158055305481, 0.7484475374221802,
 0.7547370195388794, 0.7610265016555786, 0.7673159837722778, 0.773605465888977,
 0.7798948884010315, 0.7861843705177307, 0.7924738526344299, 0.7987633347511292,
 0.8050528168678284, 0.8113422989845276, 0.8176317811012268, 0.823921263217926,
 0.8302107453346252, 0.8365001678466797, 0.8427896499633789, 0.8490791320800781,
 0.8553686141967773, 0.8616580963134766, 0.8679475784301758, 0.874237060546875,
 0.8805265426635742, 0.8868159651756287, 0.8931054472923279, 0.8993949294090271,
 0.9056844115257263, 0.9119738936424255, 0.9182633757591248, 0.924552857875824,
 0.9308423399925232, 0.9371318221092224, 0.9434212446212769, 0.9497107267379761,
 0.9560002088546753, 0.9622896909713745, 0.9685791730880737, 0.974868655204773,
 0.9811581373214722, 0.9874476194381714, 0.9937370419502258, 1.0000265836715698,
 1.006316065788269, 1.0126055479049683, 1.0188950300216675, 1.0251843929290771,
 1.0314738750457764, 1.0377633571624756, 1.0440528392791748, 1.050342321395874,
 1.0566318035125732, 1.0629212856292725, 1.0692107677459717, 1.075500249862671,
 1.0817897319793701, 1.0880792140960693, 1.0943686962127686, 1.1006581783294678,
 1.106947660446167, 1.1132371425628662, 1.1195266246795654, 1.1258161067962646,
 1.1321054697036743, 1.1383949518203735, 1.1446844339370728, 1.150973916053772,
 1.1572633981704712, 1.1635528802871704, 1.1698423624038696, 1.1761318445205688,
 1.182421326637268, 1.1887108087539673, 1.1950002908706665, 1.2012897729873657,
 1.207579255104065, 1.2138687372207642, 1.2201582193374634, 1.2264477014541626,
 1.2327371835708618, 1.2390265464782715, 1.2453160285949707, 1.25160551071167,
 1.2578949928283691, 1.2641844749450684, 1.2704739570617676, 1.2767634391784668,
 1.283052921295166, 1.2893424034118652, 1.2956318855285645, 1.3019213676452637,
 1.308210849761963, 1.314500331878662, 1.3207898139953613, 1.3270792961120605,
 1.3333687782287598, 1.339658260345459, 1.3459476232528687, 1.3522371053695679,
 1.358526587486267, 1.3648160696029663, 1.3711055517196655, 1.3773950338363647,
 1.383684515953064, 1.3899739980697632, 1.3962634801864624, 1.4025529623031616,
 1.4088424444198608, 1.41513192653656, 1.4214214086532593, 1.4277108907699585,
 1.4340003728866577, 1.440289855003357, 1.4465793371200562, 1.4528687000274658,
 1.459158182144165, 1.4654476642608643, 1.4717371463775635, 1.4780266284942627,
 1.484316110610962, 1.4906055927276611, 1.4968950748443604, 1.5031845569610596,
 1.5094740390777588, 1.515763521194458, 1.5220530033111572, 1.5283424854278564,
 1.5346319675445557, 1.5409214496612549, 1.547210931777954, 1.5535004138946533,
 1.559789776802063, 1.5660792589187622, 1.5723687410354614, 1.5786582231521606,
 1.5849477052688599, 1.591237187385559, 1.5975266695022583, 1.6038161516189575,
 1.6101056337356567, 1.616395115852356, 1.6226845979690552, 1.6289740800857544,
 1.6352635622024536, 1.6415530443191528, 1.647842526435852, 1.6541320085525513,
 1.6604214906692505, 1.6667108535766602, 1.6730003356933594, 1.6792898178100586,
 1.6855792999267578, 1.691868782043457, 1.6981582641601562, 1.7044477462768555,
 1.7107372283935547, 1.717026710510254, 1.7233161926269531, 1.7296056747436523,
 1.7358951568603516, 1.7421846389770508, 1.74847412109375, 1.7547636032104492,
 1.7610530853271484, 1.7673425674438477, 1.7736319303512573, 1.7799214124679565,
 1.7862108945846558, 1.792500376701355, 1.7987898588180542, 1.8050793409347534,
 1.8113688230514526, 1.8176583051681519, 1.823947787284851, 1.8302372694015503,
 1.8365267515182495, 1.8428162336349487, 1.849105715751648, 1.8553951978683472,
 1.8616846799850464, 1.8679741621017456, 1.8742636442184448, 1.8805530071258545,
 1.8868424892425537, 1.893131971359253, 1.8994214534759521, 1.9057109355926514,
 1.9120004177093506, 1.9182898998260498, 1.924579381942749, 1.9308688640594482,
 1.9371583461761475, 1.9434478282928467, 1.949737310409546, 1.9560267925262451,
 1.9623162746429443, 1.9686057567596436, 1.9748952388763428, 1.981184720993042,
 1.9874740839004517, 1.9937635660171509, 2.0000531673431396, 2.006342649459839,
 2.012632131576538, 2.0189216136932373, 2.0252110958099365, 2.0315005779266357,
 2.037790060043335, 2.044079303741455, 2.0503687858581543, 2.0566582679748535,
 2.0629477500915527, 2.069237232208252, 2.075526714324951, 2.0818161964416504,
 2.0881056785583496, 2.094395160675049, 2.100684642791748, 2.1069741249084473,
 2.1132636070251465, 2.1195530891418457, 2.125842571258545, 2.132132053375244,
 2.1384215354919434, 2.1447110176086426, 2.151000499725342, 2.157289981842041,
 2.1635794639587402, 2.1698689460754395, 2.1761584281921387, 2.182447910308838,
 2.188737392425537, 2.1950268745422363, 2.2013163566589355, 2.2076058387756348,
 2.213895320892334, 2.220184803009033, 2.2264742851257324, 2.2327637672424316,
 2.239053249359131, 2.24534273147583, 2.2516322135925293, 2.2579214572906494,
 2.2642109394073486, 2.270500421524048, 2.276789903640747, 2.2830793857574463,
 2.2893688678741455, 2.2956583499908447, 2.301947832107544, 2.308237314224243,
 2.3145267963409424, 2.3208162784576416, 2.327105760574341, 2.33339524269104,
 2.3396847248077393, 2.3459742069244385, 2.3522636890411377, 2.358553171157837,
 2.364842653274536, 2.3711321353912354, 2.3774216175079346, 2.383711099624634,
 2.390000581741333, 2.3962900638580322, 2.4025795459747314, 2.4088690280914307,
 2.41515851020813, 2.421447992324829, 2.4277374744415283, 2.4340269565582275,
 2.4403164386749268, 2.446605920791626, 2.452895402908325, 2.4591848850250244,
 2.4654743671417236, 2.4717636108398438, 2.478053092956543, 2.484342575073242,
 2.4906320571899414, 2.4969215393066406, 2.50321102142334, 2.509500503540039,
 2.5157899856567383, 2.5220794677734375, 2.5283689498901367, 2.534658432006836,
 2.540947914123535, 2.5472373962402344, 2.5535268783569336, 2.559816360473633,
 2.566105842590332, 2.5723953247070312, 2.5786848068237305, 2.5849742889404297,
 2.591263771057129, 2.597553253173828, 2.6038427352905273, 2.6101322174072266,
 2.616421699523926, 2.622711181640625, 2.629000663757324, 2.6352901458740234,
 2.6415796279907227, 2.647869110107422, 2.654158592224121, 2.6604480743408203,
 2.6667375564575195, 2.6730270385742188, 2.679316520690918, 2.685605764389038,
 2.6918952465057373, 2.6981847286224365, 2.7044742107391357, 2.710763692855835,
 2.717053174972534, 2.7233426570892334, 2.7296321392059326, 2.735921621322632,
 2.742211103439331, 2.7485005855560303, 2.7547900676727295, 2.7610795497894287,
 2.767369031906128, 2.773658514022827, 2.7799479961395264, 2.7862374782562256,
 2.792526960372925, 2.798816442489624, 2.8051059246063232, 2.8113954067230225,
 2.8176848888397217, 2.823974370956421, 2.83026385307312, 2.8365533351898193,
 2.8428428173065186, 2.8491322994232178, 2.855421781539917, 2.861711263656616,
 2.8680007457733154, 2.8742902278900146, 2.880579710006714, 2.886869192123413,
 2.8931586742401123, 2.8994479179382324, 2.9057374000549316, 2.912026882171631,
 2.91831636428833, 2.9246058464050293, 2.9308953285217285, 2.9371848106384277,
 2.943474292755127, 2.949763774871826, 2.9560532569885254, 2.9623427391052246,
 2.968632221221924, 2.974921703338623, 2.9812111854553223, 2.9875006675720215,
 2.9937901496887207, 3.00007963180542, 3.006369113922119, 3.0126585960388184,
 3.0189480781555176, 3.025237560272217, 3.031527042388916, 3.0378165245056152,
 3.0441060066223145, 3.0503954887390137, 3.056684970855713, 3.062974452972412,
 3.0692639350891113, 3.0755534172058105, 3.0818428993225098, 3.088132381439209,
 3.094421863555908, 3.1007113456726074, 3.1070008277893066, 3.1132900714874268,
 3.119579553604126, 3.125869035720825, 3.1321585178375244, 3.1384479999542236,
 3.144737482070923, 3.151026964187622, 3.1573164463043213, 3.1636059284210205,
 3.1698954105377197, 3.176184892654419, 3.182474374771118, 3.1887638568878174,
 3.1950533390045166, 3.201342821121216, 3.207632303237915, 3.2139217853546143,
 3.2202112674713135, 3.2265007495880127, 3.232790231704712, 3.239079713821411,
 3.2453691959381104, 3.2516586780548096, 3.257948160171509, 3.264237642288208,
 3.2705271244049072, 3.2768166065216064, 3.2831060886383057, 3.289395570755005,
 3.295685052871704, 3.3019745349884033, 3.3082640171051025, 3.3145534992218018,
 3.320842981338501, 3.3271324634552, 3.3334217071533203, 3.3397111892700195,
 3.3460006713867188, 3.352290153503418, 3.358579635620117, 3.3648691177368164,
 3.3711585998535156, 3.377448081970215, 3.383737564086914, 3.3900270462036133,
 3.3963165283203125, 3.4026060104370117, 3.408895492553711, 3.41518497467041,
 3.4214744567871094, 3.4277639389038086, 3.434053421020508, 3.440342903137207,
 3.4466323852539062, 3.4529218673706055, 3.4592113494873047, 3.465500831604004,
 3.471790313720703, 3.4780797958374023, 3.4843692779541016, 3.490658760070801,
 3.4969482421875, 3.503237724304199, 3.5095272064208984, 3.5158166885375977,
 3.522106170654297, 3.528395652770996, 3.5346851348876953, 3.5409746170043945,
 3.5472638607025146, 3.553553342819214, 3.559842824935913, 3.5661323070526123,
 3.5724217891693115, 3.5787112712860107, 3.58500075340271, 3.591290235519409,
 3.5975797176361084, 3.6038691997528076, 3.610158681869507, 3.616448163986206,
 3.6227376461029053, 3.6290271282196045, 3.6353166103363037, 3.641606092453003,
 3.647895574569702, 3.6541850566864014, 3.6604745388031006, 3.6667640209198,
 3.673053503036499, 3.6793429851531982, 3.6856324672698975, 3.6919219493865967,
 3.698211431503296, 3.704500913619995, 3.7107903957366943, 3.7170798778533936,
 3.7233693599700928, 3.729658842086792, 3.735948324203491, 3.7422378063201904,
 3.7485272884368896, 3.754816770553589, 3.761106014251709, 3.767395496368408,
 3.7736849784851074, 3.7799744606018066, 3.786263942718506, 3.792553424835205,
 3.7988429069519043, 3.8051323890686035, 3.8114218711853027, 3.817711353302002,
 3.824000835418701, 3.8302903175354004, 3.8365797996520996, 3.842869281768799,
 3.849158763885498, 3.8554482460021973, 3.8617377281188965, 3.8680272102355957,
 3.874316692352295, 3.880606174468994, 3.8868956565856934, 3.8931851387023926,
 3.899474620819092, 3.905764102935791, 3.9120535850524902, 3.9183430671691895,
 3.9246325492858887, 3.930922031402588, 3.937211513519287, 3.9435009956359863,
 3.9497904777526855, 3.9560799598693848, 3.962369441986084, 3.968658924102783,
 3.9749481678009033, 3.9812376499176025, 3.9875271320343018, 3.993816614151001,
 4.000106334686279, 4.0063958168029785, 4.012685298919678, 4.018974781036377,
 4.025264263153076, 4.031553745269775, 4.037843227386475, 4.044132709503174,
 4.050422191619873, 4.056711673736572, 4.0630011558532715, 4.069290637969971,
 4.07558012008667, 4.081869125366211, 4.08815860748291, 4.094448089599609,
 4.100737571716309, 4.107027053833008, 4.113316535949707, 4.119606018066406,
 4.1258955001831055, 4.132184982299805, 4.138474464416504, 4.144763946533203,
 4.151053428649902, 4.157342910766602, 4.163632392883301, 4.169921875,
 4.176211357116699, 4.182500839233398, 4.188790321350098, 4.195079803466797,
 4.201369285583496, 4.207658767700195, 4.2139482498168945, 4.220237731933594,
 4.226527214050293, 4.232816696166992, 4.239106178283691, 4.245395660400391,
 4.25168514251709, 4.257974624633789, 4.264264106750488, 4.2705535888671875,
 4.276843070983887, 4.283132553100586, 4.289422035217285, 4.295711517333984,
 4.302000999450684, 4.308290481567383, 4.314579963684082, 4.320869445800781,
 4.3271589279174805, 4.33344841003418, 4.339737892150879, 4.346027374267578,
 4.352316856384277, 4.358606338500977, 4.364895820617676, 4.371185302734375,
 4.377474784851074, 4.383764266967773, 4.390053749084473, 4.396343231201172,
 4.402632713317871, 4.40892219543457, 4.4152116775512695, 4.421501159667969,
 4.427790641784668, 4.434080123901367, 4.440369606018066, 4.446659088134766,
 4.452948570251465, 4.459238052368164, 4.465527534484863, 4.4718170166015625,
 4.478106498718262, 4.484395980834961, 4.49068546295166, 4.496974945068359,
 4.503264427185059, 4.5095534324646, 4.515842914581299, 4.522132396697998,
 4.528421878814697, 4.5347113609313965, 4.541000843048096, 4.547290325164795,
 4.553579807281494, 4.559869289398193, 4.566158771514893, 4.572448253631592,
 4.578737735748291, 4.58502721786499, 4.5913166999816895, 4.597606182098389,
 4.603895664215088, 4.610185146331787, 4.616474628448486, 4.6227641105651855,
 4.629053592681885, 4.635343074798584, 4.641632556915283, 4.647922039031982,
 4.654211521148682, 4.660501003265381, 4.66679048538208, 4.673079967498779,
 4.6793694496154785, 4.685658931732178, 4.691948413848877, 4.698237895965576,
 4.704527378082275, 4.710816860198975, 4.717106342315674, 4.723395824432373,
 4.729685306549072, 4.7359747886657715, 4.742264270782471, 4.74855375289917,
 4.754843235015869, 4.761132717132568, 4.767422199249268, 4.773711681365967,
 4.780001163482666, 4.786290645599365, 4.7925801277160645, 4.798869609832764,
 4.805159091949463, 4.811448574066162, 4.817738056182861, 4.8240275382995605,
 4.83031702041626, 4.836606502532959, 4.842895984649658, 4.849185466766357,
 4.855474948883057, 4.861764430999756, 4.868053913116455, 4.874343395233154,
 4.8806328773498535, 4.886922359466553, 4.893211841583252, 4.899501323699951,
 4.90579080581665, 4.91208028793335, 4.918369770050049, 4.924659252166748,
 4.930948734283447, 4.9372382164001465, 4.9435272216796875, 4.949816703796387,
 4.956106185913086, 4.962395668029785, 4.968685150146484, 4.974974632263184,
 4.981264114379883, 4.987553596496582, 4.993843078613281, 5.0001325607299805,
 5.00642204284668, 5.012711524963379, 5.019001007080078, 5.025290489196777,
 5.031579971313477, 5.037869453430176, 5.044158935546875, 5.050448417663574,
 5.056737899780273, 5.063027381896973, 5.069316864013672, 5.075606346130371,
 5.08189582824707, 5.0881853103637695, 5.094474792480469, 5.100764274597168,
 5.107053756713867, 5.113343238830566, 5.119632720947266, 5.125922203063965,
 5.132211685180664, 5.138501167297363, 5.1447906494140625, 5.151080131530762,
 5.157369613647461, 5.16365909576416, 5.169948577880859, 5.176238059997559,
 5.182527542114258, 5.188817024230957, 5.195106506347656, 5.2013959884643555,
 5.207685470581055, 5.213974952697754, 5.220264434814453, 5.226553916931152,
 5.232843399047852, 5.239132881164551, 5.24542236328125, 5.251711845397949,
 5.258001327514648, 5.264290809631348, 5.270580291748047, 5.276869773864746,
 5.283159255981445, 5.2894487380981445, 5.295738220214844, 5.302027702331543,
 5.308317184448242, 5.314606666564941, 5.320896148681641, 5.32718563079834,
 5.333475112915039, 5.339764595031738, 5.3460540771484375, 5.352343559265137,
 5.358633041381836, 5.364922523498535, 5.371211528778076, 5.377501010894775,
 5.383790493011475, 5.390079975128174, 5.396369457244873, 5.402658939361572,
 5.4089484214782715, 5.415237903594971, 5.42152738571167, 5.427816867828369,
 5.434106349945068, 5.440395832061768, 5.446685314178467, 5.452974796295166,
 5.459264278411865, 5.4655537605285645, 5.471843242645264, 5.478132724761963,
 5.484422206878662, 5.490711688995361, 5.4970011711120605, 5.50329065322876,
 5.509580135345459, 5.515869617462158, 5.522159099578857, 5.528448581695557,
 5.534738063812256, 5.541027545928955, 5.547317028045654, 5.5536065101623535,
 5.559895992279053, 5.566185474395752, 5.572474956512451, 5.57876443862915,
 5.58505392074585, 5.591343402862549, 5.597632884979248, 5.603922367095947,
 5.6102118492126465, 5.616501331329346, 5.622790813446045, 5.629080295562744,
 5.635369777679443, 5.641659259796143, 5.647948741912842, 5.654238224029541,
 5.66052770614624, 5.6668171882629395, 5.673106670379639, 5.679396152496338,
 5.685685634613037, 5.691975116729736, 5.6982645988464355, 5.704554080963135,
 5.710843563079834, 5.717133045196533, 5.723422527313232, 5.729712009429932,
 5.736001491546631, 5.74229097366333, 5.748580455780029, 5.7548699378967285,
 5.761159420013428, 5.767448902130127, 5.773738384246826, 5.780027866363525,
 5.786317348480225, 5.792606830596924, 5.798895835876465, 5.805185317993164,
 5.811474800109863, 5.8177642822265625, 5.824053764343262, 5.830343246459961,
 5.83663272857666, 5.842922210693359, 5.849211692810059, 5.855501174926758,
 5.861790657043457, 5.868080139160156, 5.8743696212768555, 5.880659103393555,
 5.886948585510254, 5.893238067626953, 5.899527549743652, 5.905817031860352,
 5.912106513977051, 5.91839599609375, 5.924685478210449, 5.930974960327148,
 5.937264442443848, 5.943553924560547, 5.949843406677246, 5.956132888793945,
 5.9624223709106445, 5.968711853027344, 5.975001335144043, 5.981290817260742,
 5.987580299377441, 5.993869781494141, 6.00015926361084, 6.006448745727539,
 6.012738227844238, 6.0190277099609375, 6.025317192077637, 6.031606674194336,
 6.037896156311035, 6.044185638427734, 6.050475120544434, 6.056764602661133,
 6.063054084777832, 6.069343566894531, 6.0756330490112305, 6.08192253112793,
 6.088212013244629, 6.094501495361328, 6.100790977478027, 6.107080459594727,
 6.113369941711426, 6.119659423828125, 6.125948905944824, 6.132238388061523,
 6.138527870178223, 6.144817352294922, 6.151106834411621, 6.15739631652832,
 6.1636857986450195, 6.169975280761719, 6.176264762878418, 6.182554244995117,
 6.188843727111816, 6.195133209228516, 6.201422691345215, 6.207712173461914,
 6.214001655578613, 6.2202911376953125, 6.2265801429748535, 6.232869625091553,
 6.239159107208252, 6.245448589324951, 6.25173807144165, 6.25802755355835,
 6.264317035675049, 6.270606517791748, 6.276895999908447, 6.2831854820251465])

SPEED = np.array([0.0, 0.021216485649347305, 0.04243297129869461, 0.06364946067333221,
 0.08486594259738922, 0.10608243942260742, 0.12729892134666443,
 0.14851540327072144, 0.16973188519477844, 0.19094838201999664,
 0.21216487884521484, 0.23338136076927185, 0.25459784269332886,
 0.27581432461738586, 0.29703080654144287, 0.3182472884654999,
 0.3394637703895569, 0.3606802523136139, 0.3818967640399933, 0.4031132459640503,
 0.4243297576904297, 0.4455462098121643, 0.4667627215385437, 0.4879791736602783,
 0.5091956853866577, 0.5304121375083923, 0.5516286492347717, 0.5728451013565063,
 0.5940616130828857, 0.6152780652046204, 0.6364945769309998, 0.6577110290527344,
 0.6789275407791138, 0.7001440525054932, 0.7213605046272278, 0.7425770163536072,
 0.7637935280799866, 0.785010039806366, 0.8062264919281006, 0.82744300365448,
 0.8486595153808594, 0.8698759078979492, 0.8910924196243286, 0.912308931350708,
 0.9335254430770874, 0.9547418355941772, 0.9759583473205566, 0.997174859046936,
 1.0183913707733154, 1.0396078824996948, 1.0608242750167847, 1.082040786743164,
 1.1032572984695435, 1.1244738101959229, 1.1456902027130127, 1.166906714439392,
 1.1881232261657715, 1.2093397378921509, 1.2305561304092407, 1.2517726421356201,
 1.2729891538619995, 1.294205665588379, 1.3154220581054688, 1.3366385698318481,
 1.3578550815582275, 1.379071593284607, 1.4002881050109863, 1.4215046167373657,
 1.4427210092544556, 1.463937520980835, 1.4851540327072144, 1.5063705444335938,
 1.5275870561599731, 1.5488035678863525, 1.570020079612732, 1.5912364721298218,
 1.6124529838562012, 1.6336694955825806, 1.65488600730896, 1.6761023998260498,
 1.6973190307617188, 1.7185355424880981, 1.7397518157958984, 1.7609683275222778,
 1.7821848392486572, 1.8034013509750366, 1.824617862701416, 1.8458343744277954,
 1.8670508861541748, 1.8882673978805542, 1.9094836711883545, 1.9307001829147339,
 1.9519166946411133, 1.9731332063674927, 1.994349718093872, 2.015566349029541,
 2.036782741546631, 2.0579991340637207, 2.0792157649993896, 2.1004321575164795,
 2.1216485500335693, 2.1428651809692383, 2.164081573486328, 2.185298204421997,
 2.206514596939087, 2.227731227874756, 2.2489476203918457, 2.2701640129089355,
 2.2913804054260254, 2.3125970363616943, 2.333813428878784, 2.4950754642486572,
 2.2637276649475098, 2.26393723487854, 2.2641820907592773, 2.2644574642181396,
 2.2647645473480225, 2.265101671218872, 2.265465259552002, 2.2658560276031494,
 2.7025065422058105, 2.541306734085083, 2.7051496505737305, 2.550431728363037,
 2.554992914199829, 2.5595552921295166, 2.564110517501831, 2.5686819553375244,
 2.5732550621032715, 2.5778369903564453, 2.582432270050049, 2.5870418548583984,
 2.5916693210601807, 2.596317768096924, 2.600991725921631, 2.6056907176971436,
 2.610426187515259, 2.615199089050293, 2.6200175285339355, 2.624885320663452,
 2.6298139095306396, 2.63480544090271, 2.6398651599884033, 2.6449973583221436,
 2.650209426879883, 2.6555068492889404, 2.6608972549438477, 2.666390895843506,
 2.6719956398010254, 2.677725315093994, 2.6835954189300537, 2.689622640609741,
 2.695823907852173, 2.702226400375366, 2.708862781524658, 2.7157750129699707,
 2.7229673862457275, 2.730628728866577, 2.7387406826019287, 2.74747633934021,
 2.757042169570923, 2.7677760124206543, 2.7802655696868896, 2.795698404312134,
 2.816478967666626, 2.8438737392425537, 2.866990089416504, 2.8832757472991943,
 2.8962762355804443, 2.907616376876831, 2.9179294109344482, 2.9275131225585938,
 2.9365482330322266, 2.945148229598999, 2.953382730484009, 2.961308717727661,
 2.9689784049987793, 2.9764111042022705, 2.983635663986206, 2.9906797409057617,
 2.997544288635254, 3.0042576789855957, 3.010819673538208, 3.0172479152679443,
 3.023540496826172, 3.0297152996063232, 3.035769462585449, 3.041710138320923,
 3.047536849975586, 3.053253412246704, 3.0588674545288086, 3.0643703937530518,
 3.0697686672210693, 3.0750246047973633, 3.0802409648895264, 3.0853099822998047,
 3.0902559757232666, 3.0951123237609863, 3.0998449325561523, 3.1044671535491943,
 3.1089768409729004, 3.113379955291748, 3.11767315864563, 3.121868848800659,
 3.1259586811065674, 3.1299567222595215, 3.1338553428649902, 3.137676239013672,
 3.1414101123809814, 3.1450695991516113, 3.148658037185669, 3.152193069458008,
 3.1556708812713623, 3.1591062545776367, 3.1625053882598877, 3.1658785343170166,
 3.1692323684692383, 3.1725826263427734, 3.1759302616119385, 3.1792967319488525,
 3.1826775074005127, 3.1860976219177246, 3.1895527839660645, 3.193052291870117,
 3.1965928077697754, 3.200162410736084, 3.2037572860717773, 3.2073607444763184,
 3.2109739780426025, 3.2145869731903076, 3.2181901931762695, 3.221776247024536,
 3.2253410816192627, 3.228872060775757, 3.232367753982544, 3.2358171939849854,
 3.2392160892486572, 3.2425549030303955, 3.245830774307251, 3.2490334510803223,
 3.2521560192108154, 3.255192756652832, 3.2581377029418945, 3.2609779834747314,
 3.2637133598327637, 3.2663395404815674, 3.268839120864868, 3.2712130546569824,
 3.2734477519989014, 3.2755391597747803, 3.277477502822876, 3.2792603969573975,
 3.280888080596924, 3.2823638916015625, 3.283686399459839, 3.2848682403564453,
 3.285897731781006, 3.286796808242798, 3.287541151046753, 3.2881686687469482,
 3.288658380508423, 3.289015293121338, 3.2892467975616455, 3.2893548011779785,
 3.2893424034118652, 3.289212942123413, 3.2889695167541504, 3.28861665725708,
 3.288154125213623, 3.287588596343994, 3.2869231700897217, 3.2861599922180176,
 3.2853031158447266, 3.284355640411377, 3.283332586288452, 3.2822039127349854,
 3.281006097793579, 3.279731273651123, 3.27838397026062, 3.2769646644592285,
 3.2754836082458496, 3.2739157676696777, 3.2722864151000977, 3.2705862522125244,
 3.26881742477417, 3.2669804096221924, 3.2650725841522217, 3.2630958557128906,
 3.2610509395599365, 3.2589375972747803, 3.256758213043213, 3.2545101642608643,
 3.252194881439209, 3.249811887741089, 3.2473623752593994, 3.244846820831299,
 3.2422661781311035, 3.2396209239959717, 3.2369022369384766, 3.234121799468994,
 3.2312777042388916, 3.228367567062378, 3.2253921031951904, 3.22235107421875,
 3.2192471027374268, 3.2160773277282715, 3.2128443717956543, 3.2095468044281006,
 3.206181764602661, 3.2027502059936523, 3.1992485523223877, 3.1956863403320312,
 3.192035675048828, 3.1883323192596436, 3.184535264968872, 3.1806702613830566,
 3.176732301712036, 3.172718048095703, 3.168621301651001, 3.164447069168091,
 3.1601903438568115, 3.155855178833008, 3.1514313220977783, 3.146928310394287,
 3.142343282699585, 3.137657403945923, 3.1328935623168945, 3.1280369758605957,
 3.1230924129486084, 3.1180529594421387, 3.1129181385040283, 3.1076955795288086,
 3.102375030517578, 3.0969574451446533, 3.0914323329925537, 3.0858092308044434,
 3.0800700187683105, 3.074207067489624, 3.0682084560394287, 3.0620665550231934,
 3.0557751655578613, 3.0493223667144775, 3.0426995754241943, 3.0359034538269043,
 3.028912305831909, 3.021728754043579, 3.0143444538116455, 3.0067391395568848,
 2.9989123344421387, 2.9908549785614014, 2.982558488845825, 2.9740099906921387,
 2.965207815170288, 2.9561328887939453, 2.946777820587158, 2.937142848968506,
 2.927212953567505, 2.916977643966675, 2.90643048286438, 2.895562171936035,
 2.8843631744384766, 2.8728280067443848, 2.860949754714966, 2.8487493991851807,
 2.836236000061035, 2.8234338760375977, 2.8103556632995605, 2.7970192432403564,
 2.7834434509277344, 2.7696428298950195, 2.755634069442749, 2.741431474685669,
 2.727060317993164, 2.712526321411133, 2.697855234146118, 2.6830599308013916,
 2.6681532859802246, 2.6531617641448975, 2.6380929946899414, 2.6229658126831055,
 2.6078004837036133, 2.592611312866211, 2.5774154663085938, 2.5622293949127197,
 2.547070026397705, 2.531954050064087, 2.5168983936309814, 2.5019195079803467,
 2.4870340824127197, 2.4722342491149902, 2.4575772285461426, 2.4430387020111084,
 2.4286186695098877, 2.4143166542053223, 2.40014386177063, 2.386064291000366,
 2.3721137046813965, 2.3582794666290283, 2.344562292098999, 2.330961227416992,
 2.3174757957458496, 2.3041064739227295, 2.2908523082733154, 2.2777132987976074,
 2.2646894454956055, 2.251779794692993, 2.2389845848083496, 2.2263035774230957,
 2.2137365341186523, 2.2012827396392822, 2.1889424324035645, 2.1767148971557617,
 2.164600372314453, 2.1526081562042236, 2.1407082080841064, 2.1289398670196533,
 2.1172635555267334, 2.105699062347412, 2.094266653060913, 2.082933187484741,
 2.071713447570801, 2.060606002807617, 2.0496013164520264, 2.0387279987335205,
 2.0279579162597656, 2.017300605773926, 2.0067570209503174, 1.9963270425796509,
 1.986010193824768, 1.9758073091506958, 1.9657185077667236, 1.9557440280914307,
 1.9458839893341064, 1.93613862991333, 1.9265081882476807, 1.9169930219650269,
 1.907593011856079, 1.8983086347579956, 1.8891323804855347, 1.8800876140594482,
 1.8711512088775635, 1.8623310327529907, 1.8536205291748047, 1.8450413942337036,
 1.8365651369094849, 1.8282146453857422, 1.8199810981750488, 1.811870813369751,
 1.8038848638534546, 1.796025276184082, 1.7882943153381348, 1.7806944847106934,
 1.7732281684875488, 1.765891194343567, 1.7587047815322876, 1.7516520023345947,
 1.7447421550750732, 1.7379772663116455, 1.7313593626022339, 1.7248910665512085,
 1.7185744047164917, 1.7124119997024536, 1.7064058780670166, 1.7005584239959717,
 1.6948719024658203, 1.6893486976623535, 1.6839910745620728, 1.67880117893219,
 1.6737816333770752, 1.6689344644546509, 1.664262294769287, 1.6597669124603271,
 1.6554484367370605, 1.6513113975524902, 1.647347331047058, 1.6435492038726807,
 1.639911413192749, 1.6364243030548096, 1.6330897808074951, 1.6298900842666626,
 1.6268271207809448, 1.6238936185836792, 1.6210782527923584, 1.6183770895004272,
 1.6157817840576172, 1.6132895946502686, 1.610893964767456, 1.6085846424102783,
 1.6063566207885742, 1.604202151298523, 1.6021195650100708, 1.6000972986221313,
 1.5981305837631226, 1.5962125062942505, 1.5943371057510376, 1.5924960374832153,
 1.5906875133514404, 1.588900089263916, 1.5871275663375854, 1.585368037223816,
 1.585368037223816, 1.5871275663375854, 1.588900089263916, 1.5906875133514404,
 1.5924960374832153, 1.5943355560302734, 1.5962127447128296, 1.5981289148330688,
 1.600097417831421, 1.602117896080017, 1.6042039394378662, 1.606354832649231,
 1.6085847616195679, 1.6108921766281128, 1.6132917404174805, 1.6157840490341187,
 1.618375301361084, 1.621075987815857, 1.6238912343978882, 1.6268296241760254,
 1.6298930644989014, 1.6330901384353638, 1.6364275217056274, 1.6399086713790894,
 1.6435494422912598, 1.6473468542099, 1.6513115167617798, 1.6554486751556396,
 1.6597671508789062, 1.664262294769287, 1.6689344644546509, 1.6737816333770752,
 1.67880117893219, 1.6839910745620728, 1.6893486976623535, 1.6948719024658203,
 1.7005584239959717, 1.7064058780670166, 1.7124119997024536, 1.7185745239257812,
 1.724891185760498, 1.7313594818115234, 1.737977385520935, 1.744742751121521,
 1.751652479171753, 1.7587047815322876, 1.7658976316452026, 1.7732223272323608,
 1.780694842338562, 1.7882945537567139, 1.7960253953933716, 1.8038781881332397,
 1.8118644952774048, 1.8199821710586548, 1.8282151222229004, 1.836572289466858,
 1.8450416326522827, 1.8536207675933838, 1.862331509590149, 1.8711514472961426,
 1.8800877332687378, 1.8891403675079346, 1.8983088731765747, 1.9075934886932373,
 1.9169930219650269, 1.9265081882476807, 1.93613862991333, 1.9458839893341064,
 1.9557440280914307, 1.9657185077667236, 1.9758073091506958, 1.986010193824768,
 1.9963270425796509, 2.0067574977874756, 2.017301082611084, 2.027958393096924,
 2.0387279987335205, 2.0496113300323486, 2.0606064796447754, 2.071714162826538,
 2.0829358100891113, 2.094266891479492, 2.105710744857788, 2.1172738075256348,
 2.1289303302764893, 2.1407086849212646, 2.1525986194610596, 2.1646006107330322,
 2.17671537399292, 2.1889426708221436, 2.2012832164764404, 2.2137367725372314,
 2.226304054260254, 2.238985061645508, 2.2517802715301514, 2.2646896839141846,
 2.2777140140533447, 2.2908527851104736, 2.3041064739227295, 2.3174757957458496,
 2.330961227416992, 2.344562292098999, 2.3582794666290283, 2.3721137046813965,
 2.386064291000366, 2.40014386177063, 2.4143166542053223, 2.42863130569458,
 2.443051815032959, 2.45757794380188, 2.4722349643707275, 2.487034797668457,
 2.501920223236084, 2.5168991088867188, 2.531954765319824, 2.5470705032348633,
 2.562229871749878, 2.577415943145752, 2.592611789703369, 2.6078009605407715,
 2.6229684352874756, 2.6380913257598877, 2.6531617641448975, 2.668154001235962,
 2.683058977127075, 2.697856903076172, 2.712529182434082, 2.727058172225952,
 2.7414355278015137, 2.7556347846984863, 2.7696409225463867, 2.783442497253418,
 2.7970192432403564, 2.8103556632995605, 2.8234338760375977, 2.836236000061035,
 2.848749876022339, 2.860950469970703, 2.872828722000122, 2.884363889694214,
 2.8955626487731934, 2.9064314365386963, 2.9169766902923584, 2.927211046218872,
 2.9371418952941895, 2.9467785358428955, 2.956129550933838, 2.9652061462402344,
 2.9740102291107178, 2.982558488845825, 2.9908549785614014, 2.9989123344421387,
 3.0067391395568848, 3.0143444538116455, 3.021728754043579, 3.028912305831909,
 3.0359034538269043, 3.0426995754241943, 3.0493223667144775, 3.0557751655578613,
 3.0620665550231934, 3.0682084560394287, 3.074207067489624, 3.0800700187683105,
 3.0858092308044434, 3.0914323329925537, 3.0969574451446533, 3.102375030517578,
 3.1076955795288086, 3.1129181385040283, 3.1180529594421387, 3.1230924129486084,
 3.1280369758605957, 3.1328935623168945, 3.137657403945923, 3.142343282699585,
 3.1469295024871826, 3.1514337062835693, 3.1558549404144287, 3.1601922512054443,
 3.1644463539123535, 3.1686203479766846, 3.172715663909912, 3.1767327785491943,
 3.180670738220215, 3.1845335960388184, 3.188321828842163, 3.1920361518859863,
 3.195678472518921, 3.199249267578125, 3.2027506828308105, 3.206181764602661,
 3.209547281265259, 3.2128443717956543, 3.2160773277282715, 3.219247341156006,
 3.2223520278930664, 3.225393295288086, 3.2283692359924316, 3.2312774658203125,
 3.2341227531433105, 3.2369022369384766, 3.2396163940429688, 3.242265224456787,
 3.244846820831299, 3.2473626136779785, 3.249811887741089, 3.252195119857788,
 3.254509687423706, 3.256760835647583, 3.2589380741119385, 3.261051893234253,
 3.2630951404571533, 3.265071392059326, 3.2669837474823, 3.26881742477417,
 3.2705869674682617, 3.2722864151000977, 3.2739157676696777, 3.2754836082458496,
 3.2769646644592285, 3.27838397026062, 3.279731273651123, 3.281006097793579,
 3.2822039127349854, 3.283332586288452, 3.284355640411377, 3.2853031158447266,
 3.2861599922180176, 3.2869231700897217, 3.287588596343994, 3.288154125213623,
 3.28861665725708, 3.2889695167541504, 3.289212942123413, 3.289342164993286,
 3.2893543243408203, 3.2892470359802246, 3.289016008377075, 3.288658380508423,
 3.2881710529327393, 3.2875514030456543, 3.2867963314056396, 3.285902976989746,
 3.2848684787750244, 3.28368878364563, 3.2823634147644043, 3.2808878421783447,
 3.2792606353759766, 3.277477741241455, 3.275538921356201, 3.273446798324585,
 3.271211624145508, 3.268841028213501, 3.266340732574463, 3.2637112140655518,
 3.260981321334839, 3.2581372261047363, 3.255187511444092, 3.252155303955078,
 3.249032974243164, 3.2458279132843018, 3.242555856704712, 3.239215850830078,
 3.235807180404663, 3.2323670387268066, 3.2288713455200195, 3.225339889526367,
 3.2217769622802734, 3.2181894779205322, 3.2145538330078125, 3.2109758853912354,
 3.2073609828948975, 3.20375394821167, 3.2001614570617676, 3.196592330932617,
 3.193052053451538, 3.1895499229431152, 3.1860954761505127, 3.1826813220977783,
 3.1792964935302734, 3.1759328842163086, 3.172581911087036, 3.1692276000976562,
 3.1658782958984375, 3.1625051498413086, 3.1591062545776367, 3.1556708812713623,
 3.152193069458008, 3.148658037185669, 3.1450695991516113, 3.1414101123809814,
 3.137676239013672, 3.1338553428649902, 3.1299567222595215, 3.1259586811065674,
 3.121868848800659, 3.11767315864563, 3.113379955291748, 3.1089768409729004,
 3.1044671535491943, 3.0998449325561523, 3.0951123237609863, 3.090266466140747,
 3.085310459136963, 3.080238103866577, 3.075059652328491, 3.0697684288024902,
 3.0643720626831055, 3.058866262435913, 3.053255081176758, 3.0475351810455322,
 3.0417065620422363, 3.035768985748291, 3.0297141075134277, 3.0235421657562256,
 3.0172483921051025, 3.010817766189575, 3.0042543411254883, 2.997544050216675,
 2.9906766414642334, 2.9836373329162598, 2.976412296295166, 2.968975305557251,
 2.9613053798675537, 2.953382968902588, 2.9451451301574707, 2.9365458488464355,
 2.9275097846984863, 2.9179117679595947, 2.9076106548309326, 2.8962769508361816,
 2.8832850456237793, 2.866903305053711, 2.843876361846924, 2.816549777984619,
 2.7957000732421875, 2.780270576477051, 2.767778158187866, 2.7570290565490723,
 2.7474794387817383, 2.7387397289276123, 2.7306289672851562, 2.7229671478271484,
 2.7157747745513916, 2.708862066268921, 2.7022244930267334, 2.695821523666382,
 2.6896204948425293, 2.683595895767212, 2.677724838256836, 2.6719963550567627,
 2.6663901805877686, 2.6608972549438477, 2.6555063724517822, 2.6502091884613037,
 2.6449971199035645, 2.639866352081299, 2.634805917739868, 2.6298139095306396,
 2.624885320663452, 2.6200175285339355, 2.615199089050293, 2.610426187515259,
 2.6056907176971436, 2.600991725921631, 2.596317768096924, 2.5916693210601807,
 2.587042808532715, 2.582430601119995, 2.5778379440307617, 2.573251485824585,
 2.5686800479888916, 2.564110279083252, 2.5595521926879883, 2.554992437362671,
 2.706442356109619, 2.2671773433685303, 2.541306734085083, 2.7025065422058105,
 2.265855073928833, 2.265465259552002, 2.265101671218872, 2.2647645473480225,
 2.264457941055298, 2.264181613922119, 2.263937473297119, 2.2637276649475098,
 2.355029344558716, 2.333812952041626, 2.312596321105957, 2.291379928588867,
 2.2701632976531982, 2.2489469051361084, 2.2277302742004395, 2.2065138816833496,
 2.1852972507476807, 2.164080858230591, 2.142864227294922, 2.121647834777832,
 2.100431203842163, 2.0792148113250732, 2.0579981803894043, 2.0367817878723145,
 2.0155651569366455, 1.9943487644195557, 1.9731322526931763, 1.9519157409667969,
 1.9306992292404175, 1.909482717514038, 1.8882662057876587, 1.8670496940612793,
 1.8458331823349, 1.8246166706085205, 1.8034001588821411, 1.7821836471557617,
 1.7609671354293823, 1.739750623703003, 1.7185341119766235, 1.6973176002502441,
 1.6761010885238647, 1.6548845767974854, 1.6336696147918701, 1.6124531030654907,
 1.5912365913391113, 1.570020079612732, 1.5488035678863525, 1.5275870561599731,
 1.5063705444335938, 1.4851540327072144, 1.463937520980835, 1.4427210092544556,
 1.4215046167373657, 1.4002881050109863, 1.379071593284607, 1.3578550815582275,
 1.3366385698318481, 1.3154220581054688, 1.2942055463790894, 1.27298903465271,
 1.2517725229263306, 1.2305560111999512, 1.2093394994735718, 1.1881229877471924,
 1.166906476020813, 1.1456899642944336, 1.1244734525680542, 1.1032569408416748,
 1.0820404291152954, 1.060823917388916, 1.0396074056625366, 1.0183908939361572,
 0.9971743822097778, 0.9759578704833984, 0.954741358757019, 0.9335248470306396,
 0.9123083353042603, 0.8910918235778809, 0.8698753118515015, 0.8486588001251221,
 0.8274422883987427, 0.8062257766723633, 0.7850092649459839, 0.7637927532196045,
 0.7425762414932251, 0.7213597297668457, 0.7001432180404663, 0.6789267063140869,
 0.6577101945877075, 0.6364936828613281, 0.6152771711349487, 0.5940606594085693,
 0.5728441476821899, 0.5516276359558105, 0.5304111242294312, 0.5091946125030518,
 0.48797813057899475, 0.46676161885261536, 0.44554510712623596,
 0.42432859539985657, 0.4031120836734772, 0.3818955719470978,
 0.3606790602207184, 0.339462548494339, 0.3182460367679596, 0.2970295250415802,
 0.2758130133152008, 0.2545965015888214, 0.2333800047636032,
 0.21216349303722382, 0.19094859063625336, 0.16973207890987396,
 0.14851556718349457, 0.12729905545711517, 0.10608255118131638,
 0.08486603945493698, 0.06364952772855759, 0.04243301972746849,
 0.021216509863734245, 0.0])

speed_interp = interp1d(THETA, SPEED)

In [3]:
from collections import deque

class MultiMarkEnv(Env):
    def __init__(self, config):
        self.MIN_X = -250
        self.MAX_X = 250
        self.MIN_Y = 0
        self.MAX_Y = 250
        self.MAX_SPEED = 10
        self.TURNING_RATE = 12.5 * np.pi / 180
        self.ITERS_PER_ACTION = 5
        self.SPEED_PENALTY = 0.4
        self.SPEED_RECOVERY_IN_SECONDS = 4
        self.MAX_REMAINING_SECONDS = config['max_remaining_seconds']
        self.target_x = 0
        self.target_y = 0
        self.penalty_queue = deque(maxlen=5)
        self.DEAD_ZONE_ANGLE = 30 * np.pi / 180

        self.plot_fn = config['plot_fn']

        self.trajectory = []

        self.MAX_DISTANCE = np.sqrt((self.MAX_X - self.MIN_X) ** 2 + (self.MAX_Y - self.MIN_Y) ** 2)

        self.coords = config['coords']
        
        self.observation = np.zeros((6,))
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,))
        
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(6,))
        self.reward_range = spaces.Box(low=-1, high=self.MAX_SPEED, shape=())

    def reset(self, seed = None, options = None):
        super().reset(seed=seed)
        # Initialization logic
        # Initialize state variables: x, y, speed, etc.
        # Return the initial observation

        self.tack_count = 0
        self.heading = 0
        self.angle_to_mark = 0
        self.speed = 0
        self.vmg = 0
        self.has_tacked = False
        self.remaining_seconds = self.MAX_REMAINING_SECONDS
        self.delta_t = 0
        self.penalty_queue = deque(maxlen=3)
        self.reward = 0

        random_observation = np.zeros(self.observation_space.shape)  

        coord_idx = np.random.choice(np.arange(0, self.coords.shape[0]))
        coord = self.coords[coord_idx]

        self.x = coord[0]
        self.y = coord[1]
        self.target_x = coord[2]
        self.target_y = coord[3]
        
        self.distance = np.sqrt((self.target_x - self.x) ** 2 + self.target_y ** 2)
        self.initial_distance = self.distance

        self.observation[0] = self.distance / self.MAX_DISTANCE

        self.is_terminal = False
        self.is_truncated = False

        if self.trajectory != []:
            self.plot_fn(self.trajectory)
            
        self.trajectory = []
        self.append_to_trajectory()
        
        return self.observation, {}

    def append_to_trajectory(self):
        heading_deg = self.heading * 180 / np.pi
        if heading_deg > 180:
            heading_deg = heading_deg - 360
        self.trajectory.append({'x': self.x, 'y': self.y, 'meta': {'vmg': self.vmg, 'heading': heading_deg, 'reward': self.reward}})
        return self

    def step(self, action):
        # each action spans 5 seconds
        dt = 5
        self.apply_action(action[0], dt).calculate_reward().is_terminal_state()
        self.observation = np.stack([
            self.distance / self.MAX_DISTANCE,
            self.vmg,
            self.heading,
            self.angle_to_mark,
            self.has_tacked,
            self.is_terminal and not self.is_truncated
        ])
        
        self.append_to_trajectory()

        return self.observation, self.reward, self.is_terminal, self.is_truncated, {}

    def render(self, mode='human'):
        pass

    def calculate_momentum_penalty(self, heading_change):
        # Calculate a penalty based on the change in heading
        # if self.has_tacked:
            # return 0.5
        penalty = abs(heading_change) / np.pi
        return penalty

    def apply_action(self, action, dt):
        desired_heading_change = action * np.pi
        prev_heading = self.heading
        
        max_heading_change = self.TURNING_RATE * dt
        actual_heading_change = np.minimum(abs(desired_heading_change), max_heading_change)
        actual_heading_change *= 1 if action >= 0 else -1

        heading = self.wrap_phase(self.heading + actual_heading_change)

        # momentum penalty
        penalty = self.calculate_momentum_penalty(actual_heading_change)
        
        self.penalty_queue.append(penalty)
        
        cumulative_penalty = min(1, sum(self.penalty_queue))

        self.speed = self.speed_from_heading(heading).reshape(()) * (1 - cumulative_penalty)

        self.x += self.speed * np.sin(heading) * dt
        self.y += self.speed * np.cos(heading) * dt
        
        self.heading = heading
        self.has_tacked = (prev_heading < np.pi) != (heading < np.pi)

        dx = self.target_x - self.x
        dy = self.target_y - self.y

        self.angle_to_mark = self.wrap_phase(np.arctan2(dx, dy))

        target_unit = np.stack([np.cos(self.angle_to_mark), np.sin(self.angle_to_mark)])
        heading_unit = np.stack([np.cos(heading), np.sin(heading)])

        self.vmg = (target_unit @ heading_unit) * self.speed

        self.distance = np.sqrt(dx ** 2 + dy ** 2)
        self.tack_count = 1 if self.has_tacked else 0
        self.remaining_seconds -= dt
        self.delta_t = dt

        return self
        
    def wrap_phase(self, angles):
        return np.remainder(np.remainder(angles, 2 * np.pi) + 2 * np.pi, 2 * np.pi)

    def speed_from_heading(self, headings):
        speed = speed_interp(headings)
        return np.where(np.abs(headings - np.pi) < self.DEAD_ZONE_ANGLE, 0, speed)

    def is_terminal_state(self):
        if self.distance < 20:
            self.is_terminal = True
            self.is_truncated = False
            return self

        has_collided = self.x < self.MIN_X or self.x > self.MAX_X or self.y < self.MIN_Y or self.y > self.MAX_Y
        
        if has_collided or self.remaining_seconds < 1:
            self.is_terminal = True
            self.is_truncated = True
            return self

        self.is_terminal = False
        self.is_truncated = False
        return self

    def calculate_reward(self):
        self.reward = 0.1 * self.vmg
        return self

In [4]:
import plotly.graph_objects as go
import time
import ipywidgets as widgets
from IPython.display import display

# Initialize the figure and scatter plot
fig = go.FigureWidget()
scatter = fig.add_scatter(mode='markers+lines')
fig.update_xaxes(range=[-250, 250],dtick=25)
fig.update_yaxes(range=[0, 250],dtick=25)
fig.layout.width=800
fig.layout.height=800

heading_fig = go.FigureWidget()
heading_scatter = heading_fig.add_scatter(mode='markers+lines')
heading_fig.update_xaxes(range=[0, 100],dtick=25)
heading_fig.update_yaxes(range=[-90, 90],dtick=30)
heading_fig.layout.width=800
heading_fig.layout.height=400
heading_fig.layout.title = "Heading over time"

vmg_fig = go.FigureWidget()
vmg_scatter = vmg_fig.add_scatter(mode='markers+lines')
vmg_fig.update_xaxes(range=[0, 100],dtick=25)
vmg_fig.update_yaxes(range=[-4, 4],dtick=2)
vmg_fig.layout.width=800
vmg_fig.layout.height=400
vmg_fig.layout.title = "VMG over time"

# Initialize output for the text
out = widgets.Output()

# Display text and figure
display(out)
display(fig)
display(heading_fig)
display(vmg_fig)

episode = 0

def plot(data):
    global episode, out, fig
    episode += 1
    if episode % 50 != 0:
        return
    x_values = [point['x'] for point in data]
    y_values = [point['y'] for point in data]
    meta_values = [{k: '%.3f' % v for (k, v) in point['meta'].items()} for point in data]
    
    headings = [point['meta']['heading'] for point in data]
    vmg = [point['meta']['vmg'] for point in data]

    global_meta = {'final_position': (x_values[-1], y_values[-1]), 'reward': sum([point['meta']['reward'] for point in data]), 'iters': len(data), 'episode': episode}
    
    # Update data
    scatter = fig.data[0]
    scatter.x = x_values
    scatter.y = y_values
    scatter.hovertext = meta_values  # Add this line
    
    heading_scatter = heading_fig.data[0]
    heading_scatter.x = list(range(len(headings)))
    heading_scatter.y = headings
    
    vmg_scatter = vmg_fig.data[0]
    vmg_scatter.x = list(range(len(vmg)))
    vmg_scatter.y = vmg

    fig.update_xaxes(range=[-250, 250],dtick=25)
    fig.update_yaxes(range=[0, 250],dtick=25)
    fig.layout.width=800
    fig.layout.height=800
    
#     heading_fig.update_xaxes(range=[0, 100],dtick=25)
    heading_fig.update_yaxes(range=[-90, 90],dtick=30)
    heading_fig.layout.width=800
    heading_fig.layout.height=400
    
#     vmg_fig.update_xaxes(range=[0, 100],dtick=25)
#     vmg_fig.update_yaxes(range=[-10, 10],dtick=2)
    vmg_fig.layout.width=800
    vmg_fig.layout.height=400

    # Update the text output
    with out:
        out.clear_output(wait=True)
        print(f"Global Meta: {str(global_meta)}")

None

Output()

FigureWidget({
    'data': [{'mode': 'markers+lines', 'type': 'scatter', 'uid': '2cb6246e-77c2-4648-87a1-93a849d89b41'}],
    'layout': {'height': 800,
               'template': '...',
               'width': 800,
               'xaxis': {'dtick': 25, 'range': [-250, 250]},
               'yaxis': {'dtick': 25, 'range': [0, 250]}}
})

FigureWidget({
    'data': [{'mode': 'markers+lines', 'type': 'scatter', 'uid': 'a9e17480-fb1a-44de-bebe-c24ca87a7ca7'}],
    'layout': {'height': 400,
               'template': '...',
               'title': {'text': 'Heading over time'},
               'width': 800,
               'xaxis': {'dtick': 25, 'range': [0, 100]},
               'yaxis': {'dtick': 30, 'range': [-90, 90]}}
})

FigureWidget({
    'data': [{'mode': 'markers+lines', 'type': 'scatter', 'uid': '4252125c-792f-427c-ae50-56506bd19dde'}],
    'layout': {'height': 400,
               'template': '...',
               'title': {'text': 'VMG over time'},
               'width': 800,
               'xaxis': {'dtick': 25, 'range': [0, 100]},
               'yaxis': {'dtick': 2, 'range': [-4, 4]}}
})

In [5]:
# Configuration

r = 250

config = {
    'max_tacks': 2,
    # 'coords': np.array([[0, 0, 0, r], [0, r, 0, 0]]),
    'coords': np.array([[0, 0, 0, r]]),
    'max_remaining_seconds': 500,
    'plot_fn': plot
}

# Initialize Environment
env = MultiMarkEnv(config)

# Initialize PPO model
model = PPO("MlpPolicy", env, verbose=1, device='cuda')

# Train the model
model.learn(total_timesteps=1_000_000)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 60.6     |
|    ep_rew_mean     | 0.128    |
| time/              |          |
|    fps             | 2551     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 61.7         |
|    ep_rew_mean          | 0.142        |
| time/                   |              |
|    fps                  | 1907         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0040748306 |
|    clip_fraction        | 0.0281       |
|    clip_range           | 0.2          |
|    e

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 95.2         |
|    ep_rew_mean          | 0.931        |
| time/                   |              |
|    fps                  | 1825         |
|    iterations           | 11           |
|    time_elapsed         | 12           |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0043028425 |
|    clip_fraction        | 0.026        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.16        |
|    explained_variance   | 0.107        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0392       |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.00352     |
|    std                  | 0.759        |
|    value_loss           | 0.0163       |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 93.5        |
|    ep_rew_mean          | 1.32        |
| time/                   |             |
|    fps                  | 1826        |
|    iterations           | 20          |
|    time_elapsed         | 22          |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.003217151 |
|    clip_fraction        | 0.0396      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.886      |
|    explained_variance   | 0.625       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00797    |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00294    |
|    std                  | 0.581       |
|    value_loss           | 0.0148      |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 96.2         |
|    ep_rew_mean          | 1.98         |
| time/                   |              |
|    fps                  | 1822         |
|    iterations           | 29           |
|    time_elapsed         | 32           |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0065228515 |
|    clip_fraction        | 0.0548       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.558       |
|    explained_variance   | 0.837        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00522      |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.00437     |
|    std                  | 0.416        |
|    value_loss           | 0.0194       |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 99          |
|    ep_rew_mean          | 3.16        |
| time/                   |             |
|    fps                  | 1821        |
|    iterations           | 38          |
|    time_elapsed         | 42          |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.008595312 |
|    clip_fraction        | 0.0799      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.148      |
|    explained_variance   | 0.947       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00763    |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.00529    |
|    std                  | 0.273       |
|    value_loss           | 0.0137      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 94.1        |
|    ep_rew_mean          | 4.36        |
| time/                   |             |
|    fps                  | 1819        |
|    iterations           | 47          |
|    time_elapsed         | 52          |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.007141703 |
|    clip_fraction        | 0.0978      |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.258       |
|    explained_variance   | 0.982       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0155     |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.00337    |
|    std                  | 0.183       |
|    value_loss           | 0.0107      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 71.7        |
|    ep_rew_mean          | 4.52        |
| time/                   |             |
|    fps                  | 1818        |
|    iterations           | 56          |
|    time_elapsed         | 63          |
|    total_timesteps      | 114688      |
| train/                  |             |
|    approx_kl            | 0.017245356 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.629       |
|    explained_variance   | 0.996       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00427     |
|    n_updates            | 550         |
|    policy_gradient_loss | -0.00508    |
|    std                  | 0.126       |
|    value_loss           | 0.00337     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 55.8        |
|    ep_rew_mean          | 4.55        |
| time/                   |             |
|    fps                  | 1816        |
|    iterations           | 65          |
|    time_elapsed         | 73          |
|    total_timesteps      | 133120      |
| train/                  |             |
|    approx_kl            | 0.012117287 |
|    clip_fraction        | 0.147       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.03        |
|    explained_variance   | 0.998       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00365     |
|    n_updates            | 640         |
|    policy_gradient_loss | -0.00682    |
|    std                  | 0.084       |
|    value_loss           | 0.00258     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 43          |
|    ep_rew_mean          | 4.58        |
| time/                   |             |
|    fps                  | 1816        |
|    iterations           | 74          |
|    time_elapsed         | 83          |
|    total_timesteps      | 151552      |
| train/                  |             |
|    approx_kl            | 0.023533963 |
|    clip_fraction        | 0.178       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.42        |
|    explained_variance   | 0.998       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0161     |
|    n_updates            | 730         |
|    policy_gradient_loss | 0.00446     |
|    std                  | 0.0575      |
|    value_loss           | 0.00191     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 40.4        |
|    ep_rew_mean          | 4.54        |
| time/                   |             |
|    fps                  | 1813        |
|    iterations           | 83          |
|    time_elapsed         | 93          |
|    total_timesteps      | 169984      |
| train/                  |             |
|    approx_kl            | 0.015379546 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.67        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0161     |
|    n_updates            | 820         |
|    policy_gradient_loss | 0.00429     |
|    std                  | 0.0452      |
|    value_loss           | 0.00219     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 36.3        |
|    ep_rew_mean          | 4.62        |
| time/                   |             |
|    fps                  | 1812        |
|    iterations           | 92          |
|    time_elapsed         | 103         |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.026499782 |
|    clip_fraction        | 0.243       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.95        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0278      |
|    n_updates            | 910         |
|    policy_gradient_loss | 0.0133      |
|    std                  | 0.0339      |
|    value_loss           | 0.00223     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 38.1        |
|    ep_rew_mean          | 4.56        |
| time/                   |             |
|    fps                  | 1812        |
|    iterations           | 101         |
|    time_elapsed         | 114         |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.025193749 |
|    clip_fraction        | 0.284       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.12        |
|    explained_variance   | 1           |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0133      |
|    n_updates            | 1000        |
|    policy_gradient_loss | 0.0207      |
|    std                  | 0.0287      |
|    value_loss           | 0.000681    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 38.6        |
|    ep_rew_mean          | 4.59        |
| time/                   |             |
|    fps                  | 1811        |
|    iterations           | 110         |
|    time_elapsed         | 124         |
|    total_timesteps      | 225280      |
| train/                  |             |
|    approx_kl            | 0.040319644 |
|    clip_fraction        | 0.294       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.28        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0211      |
|    n_updates            | 1090        |
|    policy_gradient_loss | 0.0167      |
|    std                  | 0.0244      |
|    value_loss           | 0.00102     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 36.2       |
|    ep_rew_mean          | 4.59       |
| time/                   |            |
|    fps                  | 1810       |
|    iterations           | 119        |
|    time_elapsed         | 134        |
|    total_timesteps      | 243712     |
| train/                  |            |
|    approx_kl            | 0.05447334 |
|    clip_fraction        | 0.365      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.37       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0127     |
|    n_updates            | 1180       |
|    policy_gradient_loss | 0.0343     |
|    std                  | 0.0226     |
|    value_loss           | 0.00114    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 35.4        |
|    ep_rew_mean          | 4.56        |
| time/                   |             |
|    fps                  | 1808        |
|    iterations           | 128         |
|    time_elapsed         | 144         |
|    total_timesteps      | 262144      |
| train/                  |             |
|    approx_kl            | 0.033118322 |
|    clip_fraction        | 0.309       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.42        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00124    |
|    n_updates            | 1270        |
|    policy_gradient_loss | 0.0219      |
|    std                  | 0.0216      |
|    value_loss           | 0.00111     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 34.9        |
|    ep_rew_mean          | 4.59        |
| time/                   |             |
|    fps                  | 1808        |
|    iterations           | 137         |
|    time_elapsed         | 155         |
|    total_timesteps      | 280576      |
| train/                  |             |
|    approx_kl            | 0.037884243 |
|    clip_fraction        | 0.346       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.45        |
|    explained_variance   | 1           |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00427    |
|    n_updates            | 1360        |
|    policy_gradient_loss | 0.027       |
|    std                  | 0.0209      |
|    value_loss           | 0.00113     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 35.5        |
|    ep_rew_mean          | 4.61        |
| time/                   |             |
|    fps                  | 1806        |
|    iterations           | 147         |
|    time_elapsed         | 166         |
|    total_timesteps      | 301056      |
| train/                  |             |
|    approx_kl            | 0.032189522 |
|    clip_fraction        | 0.341       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.55        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0123      |
|    n_updates            | 1460        |
|    policy_gradient_loss | 0.0283      |
|    std                  | 0.0187      |
|    value_loss           | 0.00134     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 40.2       |
|    ep_rew_mean          | 4.34       |
| time/                   |            |
|    fps                  | 1806       |
|    iterations           | 157        |
|    time_elapsed         | 177        |
|    total_timesteps      | 321536     |
| train/                  |            |
|    approx_kl            | 0.10632736 |
|    clip_fraction        | 0.369      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.59       |
|    explained_variance   | 0.993      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00681   |
|    n_updates            | 1560       |
|    policy_gradient_loss | 0.026      |
|    std                  | 0.0182     |
|    value_loss           | 0.00548    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 37.2        |
|    ep_rew_mean          | 4.56        |
| time/                   |             |
|    fps                  | 1806        |
|    iterations           | 167         |
|    time_elapsed         | 189         |
|    total_timesteps      | 342016      |
| train/                  |             |
|    approx_kl            | 0.040262707 |
|    clip_fraction        | 0.317       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.46        |
|    explained_variance   | 0.998       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00884     |
|    n_updates            | 1660        |
|    policy_gradient_loss | 0.0145      |
|    std                  | 0.0207      |
|    value_loss           | 0.00275     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 36.8       |
|    ep_rew_mean          | 4.56       |
| time/                   |            |
|    fps                  | 1805       |
|    iterations           | 176        |
|    time_elapsed         | 199        |
|    total_timesteps      | 360448     |
| train/                  |            |
|    approx_kl            | 0.03015726 |
|    clip_fraction        | 0.293      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.35       |
|    explained_variance   | 0.996      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.019     |
|    n_updates            | 1750       |
|    policy_gradient_loss | -0.000396  |
|    std                  | 0.0233     |
|    value_loss           | 0.00439    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 32.4       |
|    ep_rew_mean          | 4.62       |
| time/                   |            |
|    fps                  | 1805       |
|    iterations           | 186        |
|    time_elapsed         | 210        |
|    total_timesteps      | 380928     |
| train/                  |            |
|    approx_kl            | 0.02370812 |
|    clip_fraction        | 0.279      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.36       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0093     |
|    n_updates            | 1850       |
|    policy_gradient_loss | 0.0186     |
|    std                  | 0.0228     |
|    value_loss           | 0.00169    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 32.3       |
|    ep_rew_mean          | 4.62       |
| time/                   |            |
|    fps                  | 1805       |
|    iterations           | 196        |
|    time_elapsed         | 222        |
|    total_timesteps      | 401408     |
| train/                  |            |
|    approx_kl            | 0.05045715 |
|    clip_fraction        | 0.347      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.48       |
|    explained_variance   | 0.998      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00022   |
|    n_updates            | 1950       |
|    policy_gradient_loss | 0.0263     |
|    std                  | 0.0202     |
|    value_loss           | 0.00179    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32          |
|    ep_rew_mean          | 4.63        |
| time/                   |             |
|    fps                  | 1804        |
|    iterations           | 205         |
|    time_elapsed         | 232         |
|    total_timesteps      | 419840      |
| train/                  |             |
|    approx_kl            | 0.018881245 |
|    clip_fraction        | 0.285       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.54        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.004       |
|    n_updates            | 2040        |
|    policy_gradient_loss | 0.0231      |
|    std                  | 0.019       |
|    value_loss           | 0.00146     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32.2        |
|    ep_rew_mean          | 4.64        |
| time/                   |             |
|    fps                  | 1800        |
|    iterations           | 214         |
|    time_elapsed         | 243         |
|    total_timesteps      | 438272      |
| train/                  |             |
|    approx_kl            | 0.025681788 |
|    clip_fraction        | 0.287       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.6         |
|    explained_variance   | 0.997       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00482     |
|    n_updates            | 2130        |
|    policy_gradient_loss | 0.0239      |
|    std                  | 0.0181      |
|    value_loss           | 0.0022      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 36.7       |
|    ep_rew_mean          | 1.42       |
| time/                   |            |
|    fps                  | 1799       |
|    iterations           | 224        |
|    time_elapsed         | 255        |
|    total_timesteps      | 458752     |
| train/                  |            |
|    approx_kl            | 0.30219352 |
|    clip_fraction        | 0.382      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.59       |
|    explained_variance   | 0.869      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0369     |
|    n_updates            | 2230       |
|    policy_gradient_loss | 0.0214     |
|    std                  | 0.0182     |
|    value_loss           | 0.106      |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 35          |
|    ep_rew_mean          | 4.23        |
| time/                   |             |
|    fps                  | 1797        |
|    iterations           | 233         |
|    time_elapsed         | 265         |
|    total_timesteps      | 477184      |
| train/                  |             |
|    approx_kl            | 0.023803264 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.56        |
|    explained_variance   | 0.918       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0299      |
|    n_updates            | 2320        |
|    policy_gradient_loss | 0.0161      |
|    std                  | 0.0185      |
|    value_loss           | 0.111       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 34.1       |
|    ep_rew_mean          | 4.57       |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 242        |
|    time_elapsed         | 275        |
|    total_timesteps      | 495616     |
| train/                  |            |
|    approx_kl            | 0.07938046 |
|    clip_fraction        | 0.303      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.6        |
|    explained_variance   | 0.998      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0016    |
|    n_updates            | 2410       |
|    policy_gradient_loss | 0.0171     |
|    std                  | 0.0178     |
|    value_loss           | 0.00584    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 33.7       |
|    ep_rew_mean          | 4.63       |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 251        |
|    time_elapsed         | 286        |
|    total_timesteps      | 514048     |
| train/                  |            |
|    approx_kl            | 0.10700264 |
|    clip_fraction        | 0.314      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.59       |
|    explained_variance   | 0.998      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0297     |
|    n_updates            | 2500       |
|    policy_gradient_loss | 0.0321     |
|    std                  | 0.0181     |
|    value_loss           | 0.00517    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32.8        |
|    ep_rew_mean          | 4.64        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 260         |
|    time_elapsed         | 296         |
|    total_timesteps      | 532480      |
| train/                  |             |
|    approx_kl            | 0.025165793 |
|    clip_fraction        | 0.293       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.64        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0395      |
|    n_updates            | 2590        |
|    policy_gradient_loss | 0.0169      |
|    std                  | 0.0173      |
|    value_loss           | 0.0032      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 33.3       |
|    ep_rew_mean          | 4.65       |
| time/                   |            |
|    fps                  | 1795       |
|    iterations           | 269        |
|    time_elapsed         | 306        |
|    total_timesteps      | 550912     |
| train/                  |            |
|    approx_kl            | 0.04436531 |
|    clip_fraction        | 0.281      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.7        |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0809     |
|    n_updates            | 2680       |
|    policy_gradient_loss | 0.0228     |
|    std                  | 0.0162     |
|    value_loss           | 0.00126    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 34.7        |
|    ep_rew_mean          | 4.66        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 278         |
|    time_elapsed         | 317         |
|    total_timesteps      | 569344      |
| train/                  |             |
|    approx_kl            | 0.031761605 |
|    clip_fraction        | 0.299       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.7         |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0473      |
|    n_updates            | 2770        |
|    policy_gradient_loss | 0.0187      |
|    std                  | 0.0163      |
|    value_loss           | 0.00102     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32.3        |
|    ep_rew_mean          | 4.56        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 287         |
|    time_elapsed         | 327         |
|    total_timesteps      | 587776      |
| train/                  |             |
|    approx_kl            | 0.041976854 |
|    clip_fraction        | 0.349       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.74        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0388      |
|    n_updates            | 2860        |
|    policy_gradient_loss | 0.0169      |
|    std                  | 0.0155      |
|    value_loss           | 0.00122     |
-----------------------------------------
---------------------------------------
| rollout/                |         

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 32.1       |
|    ep_rew_mean          | 4.66       |
| time/                   |            |
|    fps                  | 1794       |
|    iterations           | 297        |
|    time_elapsed         | 338        |
|    total_timesteps      | 608256     |
| train/                  |            |
|    approx_kl            | 0.04625818 |
|    clip_fraction        | 0.253      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.82       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0778     |
|    n_updates            | 2960       |
|    policy_gradient_loss | 0.0206     |
|    std                  | 0.0144     |
|    value_loss           | 0.00127    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32.1        |
|    ep_rew_mean          | 4.63        |
| time/                   |             |
|    fps                  | 1794        |
|    iterations           | 306         |
|    time_elapsed         | 349         |
|    total_timesteps      | 626688      |
| train/                  |             |
|    approx_kl            | 0.030841606 |
|    clip_fraction        | 0.357       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.85        |
|    explained_variance   | 0.997       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0187      |
|    n_updates            | 3050        |
|    policy_gradient_loss | 0.0411      |
|    std                  | 0.0141      |
|    value_loss           | 0.00205     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 76.7      |
|    ep_rew_mean          | 2.4       |
| time/                   |           |
|    fps                  | 1794      |
|    iterations           | 316       |
|    time_elapsed         | 360       |
|    total_timesteps      | 647168    |
| train/                  |           |
|    approx_kl            | 4.6533837 |
|    clip_fraction        | 0.348     |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.79      |
|    explained_variance   | 0.981     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0407   |
|    n_updates            | 3150      |
|    policy_gradient_loss | 0.0457    |
|    std                  | 0.0149    |
|    value_loss           | 0.00259   |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 74        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 64.1        |
|    ep_rew_mean          | 4.4         |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 326         |
|    time_elapsed         | 371         |
|    total_timesteps      | 667648      |
| train/                  |             |
|    approx_kl            | 0.019806363 |
|    clip_fraction        | 0.288       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.7         |
|    explained_variance   | 0.995       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0138      |
|    n_updates            | 3250        |
|    policy_gradient_loss | 0.0153      |
|    std                  | 0.0163      |
|    value_loss           | 0.00459     |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 60.5       |
|    ep_rew_mean          | 4.6        |
| time/                   |            |
|    fps                  | 1795       |
|    iterations           | 335        |
|    time_elapsed         | 382        |
|    total_timesteps      | 686080     |
| train/                  |            |
|    approx_kl            | 0.02485303 |
|    clip_fraction        | 0.291      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.67       |
|    explained_variance   | 0.997      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0429     |
|    n_updates            | 3340       |
|    policy_gradient_loss | 0.0194     |
|    std                  | 0.0168     |
|    value_loss           | 0.00152    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 54.1        |
|    ep_rew_mean          | 4.61        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 344         |
|    time_elapsed         | 392         |
|    total_timesteps      | 704512      |
| train/                  |             |
|    approx_kl            | 0.054134227 |
|    clip_fraction        | 0.287       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.7         |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00268     |
|    n_updates            | 3430        |
|    policy_gradient_loss | 0.0162      |
|    std                  | 0.0163      |
|    value_loss           | 0.000751    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 51.3        |
|    ep_rew_mean          | 4.61        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 353         |
|    time_elapsed         | 402         |
|    total_timesteps      | 722944      |
| train/                  |             |
|    approx_kl            | 0.044947367 |
|    clip_fraction        | 0.269       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.73        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0105     |
|    n_updates            | 3520        |
|    policy_gradient_loss | 0.0162      |
|    std                  | 0.0156      |
|    value_loss           | 0.000662    |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 47.8        |
|    ep_rew_mean          | 4.61        |
| time/                   |             |
|    fps                  | 1796        |
|    iterations           | 362         |
|    time_elapsed         | 412         |
|    total_timesteps      | 741376      |
| train/                  |             |
|    approx_kl            | 0.050600164 |
|    clip_fraction        | 0.297       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.79        |
|    explained_variance   | 1           |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0305      |
|    n_updates            | 3610        |
|    policy_gradient_loss | 0.0209      |
|    std                  | 0.0147      |
|    value_loss           | 0.000567    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 46.1       |
|    ep_rew_mean          | 4.6        |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 371        |
|    time_elapsed         | 423        |
|    total_timesteps      | 759808     |
| train/                  |            |
|    approx_kl            | 0.05063448 |
|    clip_fraction        | 0.319      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.8        |
|    explained_variance   | 1          |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0389     |
|    n_updates            | 3700       |
|    policy_gradient_loss | 0.0229     |
|    std                  | 0.0147     |
|    value_loss           | 0.000527   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 40.3        |
|    ep_rew_mean          | 4.6         |
| time/                   |             |
|    fps                  | 1796        |
|    iterations           | 380         |
|    time_elapsed         | 433         |
|    total_timesteps      | 778240      |
| train/                  |             |
|    approx_kl            | 0.049717538 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.82        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0259     |
|    n_updates            | 3790        |
|    policy_gradient_loss | 0.0262      |
|    std                  | 0.0144      |
|    value_loss           | 0.000697    |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 41.6       |
|    ep_rew_mean          | 4.6        |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 389        |
|    time_elapsed         | 443        |
|    total_timesteps      | 796672     |
| train/                  |            |
|    approx_kl            | 0.03672215 |
|    clip_fraction        | 0.317      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.84       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0355    |
|    n_updates            | 3880       |
|    policy_gradient_loss | 0.0179     |
|    std                  | 0.0142     |
|    value_loss           | 0.00116    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 32.9       |
|    ep_rew_mean          | 4.58       |
| time/                   |            |
|    fps                  | 1795       |
|    iterations           | 398        |
|    time_elapsed         | 453        |
|    total_timesteps      | 815104     |
| train/                  |            |
|    approx_kl            | 0.06747741 |
|    clip_fraction        | 0.31       |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.84       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0173     |
|    n_updates            | 3970       |
|    policy_gradient_loss | 0.0193     |
|    std                  | 0.0141     |
|    value_loss           | 0.00136    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 32.1       |
|    ep_rew_mean          | 4.63       |
| time/                   |            |
|    fps                  | 1795       |
|    iterations           | 407        |
|    time_elapsed         | 464        |
|    total_timesteps      | 833536     |
| train/                  |            |
|    approx_kl            | 0.09274177 |
|    clip_fraction        | 0.329      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.88       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0152     |
|    n_updates            | 4060       |
|    policy_gradient_loss | 0.0306     |
|    std                  | 0.0136     |
|    value_loss           | 0.00139    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 42.3       |
|    ep_rew_mean          | 4.63       |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 416        |
|    time_elapsed         | 474        |
|    total_timesteps      | 851968     |
| train/                  |            |
|    approx_kl            | 0.03551004 |
|    clip_fraction        | 0.348      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.84       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0216    |
|    n_updates            | 4150       |
|    policy_gradient_loss | 0.0217     |
|    std                  | 0.0142     |
|    value_loss           | 0.0016     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 33.2        |
|    ep_rew_mean          | 4.66        |
| time/                   |             |
|    fps                  | 1796        |
|    iterations           | 426         |
|    time_elapsed         | 485         |
|    total_timesteps      | 872448      |
| train/                  |             |
|    approx_kl            | 0.028399881 |
|    clip_fraction        | 0.31        |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.94        |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0243      |
|    n_updates            | 4250        |
|    policy_gradient_loss | 0.0264      |
|    std                  | 0.0128      |
|    value_loss           | 0.00306     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 32.8       |
|    ep_rew_mean          | 4.65       |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 435        |
|    time_elapsed         | 495        |
|    total_timesteps      | 890880     |
| train/                  |            |
|    approx_kl            | 0.04811806 |
|    clip_fraction        | 0.288      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.93       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0825     |
|    n_updates            | 4340       |
|    policy_gradient_loss | 0.0198     |
|    std                  | 0.0129     |
|    value_loss           | 0.00136    |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32          |
|    ep_rew_mean          | 4.64        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 444         |
|    time_elapsed         | 506         |
|    total_timesteps      | 909312      |
| train/                  |             |
|    approx_kl            | 0.026236242 |
|    clip_fraction        | 0.286       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.9         |
|    explained_variance   | 0.999       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0453      |
|    n_updates            | 4430        |
|    policy_gradient_loss | 0.0246      |
|    std                  | 0.0134      |
|    value_loss           | 0.00138     |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 56.6        |
|    ep_rew_mean          | 4.49        |
| time/                   |             |
|    fps                  | 1795        |
|    iterations           | 453         |
|    time_elapsed         | 516         |
|    total_timesteps      | 927744      |
| train/                  |             |
|    approx_kl            | 0.046614025 |
|    clip_fraction        | 0.333       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.85        |
|    explained_variance   | 0.998       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0246      |
|    n_updates            | 4520        |
|    policy_gradient_loss | 0.0248      |
|    std                  | 0.0141      |
|    value_loss           | 0.00188     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 52.8       |
|    ep_rew_mean          | 4.6        |
| time/                   |            |
|    fps                  | 1795       |
|    iterations           | 463        |
|    time_elapsed         | 527        |
|    total_timesteps      | 948224     |
| train/                  |            |
|    approx_kl            | 0.02511001 |
|    clip_fraction        | 0.294      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.78       |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.006      |
|    n_updates            | 4620       |
|    policy_gradient_loss | 0.00853    |
|    std                  | 0.015      |
|    value_loss           | 0.00108    |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 48.1       |
|    ep_rew_mean          | 4.59       |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 473        |
|    time_elapsed         | 539        |
|    total_timesteps      | 968704     |
| train/                  |            |
|    approx_kl            | 0.03783292 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.78       |
|    explained_variance   | 1          |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00829    |
|    n_updates            | 4720       |
|    policy_gradient_loss | 0.0107     |
|    std                  | 0.0149     |
|    value_loss           | 0.000341   |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 49.4       |
|    ep_rew_mean          | 4.6        |
| time/                   |            |
|    fps                  | 1796       |
|    iterations           | 482        |
|    time_elapsed         | 549        |
|    total_timesteps      | 987136     |
| train/                  |            |
|    approx_kl            | 0.02946841 |
|    clip_fraction        | 0.333      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.79       |
|    explained_variance   | 1          |
|    learning_rate        | 0.0003     |
|    loss                 | 2.98e-05   |
|    n_updates            | 4810       |
|    policy_gradient_loss | 0.0283     |
|    std                  | 0.0148     |
|    value_loss           | 0.000323   |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

<stable_baselines3.ppo.ppo.PPO at 0x7fa09aa20ee0>

In [6]:
# model.learn(total_timesteps=1_000_000)