In [1]:
import sys, time, random, math, cv2, os
import numpy as np

sys.path.insert(0, '../src')
from robot import Robot

In [2]:
N_STATE_GTG = 120
actions_gtg = np.array([(3,3), (1,-1), (-1,1)])

dimensions = (N_STATE_GTG, len(actions_gtg))

path = "Q_matrix_gtg.txt"
on_disk = path in os.listdir()

if on_disk:
    f = open(path, "r")
    
    t = f.read().split()
    iterations = int(t[0])
    random_rate = float(t[1])
    learning_rate = float(t[2])
    
    Q = np.reshape(np.array([float(i) for i in t[3:]]), dimensions)

    f.close()
else:
    iterations = 0
    random_rate = 0.9
    learning_rate = 0.6
    Q = np.zeros(dimensions)

In [3]:
def cicle(ang):
    return (ang + 2 * math.pi) % (2*math.pi)

def get_distance_from_goal(a,b):
    return np.linalg.norm(a-b)

def get_angular_distance(v1,v2):

    v1_aux = v1 / np.linalg.norm(v1)
    v2_aux = v2 / np.linalg.norm(v2)

    theta = np.arccos(np.clip(np.dot(v1_aux, v2_aux), -1.0, 1.0))
    rot_theta = np.array([[np.cos(theta),-np.sin(theta)],[np.sin(theta),np.cos(theta)]])
    theta = theta*180.0/math.pi
    theta = (360.0+theta)%360.0

    if abs(np.dot(np.dot(rot_theta,v2_aux),v1_aux)-1.0) > 1e-2:
        theta = 360.0 - theta

    return theta

def get_state_go_to_goal(current_position, current_angle, goal):
    angle_distance = get_angular_distance(goal - current_position, np.array([math.cos(current_angle),math.sin(current_angle)]))
    if angle_distance <= (180 / N_STATE_GTG) or angle_distance >= (360 - (180 / N_STATE_GTG)):
        r = 0
    else:
        r = (angle_distance - (180 / N_STATE_GTG)) / (360 / N_STATE_GTG) + 1
    return int(r)

def get_action_go_to_goal(Q, state, random_rate):
    a = random.random()
    if a < random_rate:
        i = random.randint(0, len(actions_gtg) - 1)
    else:
        i = 0
        for j in range(len(actions_gtg)):
            if Q[state][j] > Q[state][i]:
                i = j
    return i
    
def get_reward_go_to_goal(current_position, current_angle, new_position, new_angle, goal):
    angle_distance = get_angular_distance(goal - current_position, np.array([math.cos(current_angle),math.sin(current_angle)]))
    new_angle_distance = get_angular_distance(goal - new_position, np.array([math.cos(new_angle),math.sin(new_angle)]))
    if angle_distance > 180: angle_distance = 360 - angle_distance
    if new_angle_distance > 180: new_angle_distance = 360 - new_angle_distance
    print("nad: " + str(new_angle_distance))
    print("oad: " + str(angle_distance))
    delta_angle = angle_distance - new_angle_distance

    distance = get_distance_from_goal(current_position, goal)
    new_distance = get_distance_from_goal(new_position, goal)
    print("nd: " + str(new_distance))
    print("od: " + str(distance))
    delta_distance = distance - new_distance

    return delta_angle + 50 * delta_distance
    

In [None]:
robot = Robot("#0")
goal = np.array([13,0])

max_iterations = 1000

goal = [(13,0), (-13,0)]
i = 0

while robot.get_connection_status() != -1 and iterations < max_iterations:
    current_position = np.array(robot.get_current_position()[:-1])
    current_angle = robot.get_current_orientation()[2]
    
    state = get_state_go_to_goal(current_position, current_angle, goal[i])
    
    ac = get_action_go_to_goal(Q, state, random_rate)
    
    robot.set_left_velocity(actions_gtg[ac][0])
    robot.set_right_velocity(actions_gtg[ac][1])
    time.sleep(3)
    robot.set_left_velocity(0)
    robot.set_right_velocity(0)

    new_position = np.array(robot.get_current_position()[:-1])
    new_angle = robot.get_current_orientation()[2]
    
    reward = get_reward_go_to_goal(current_position, current_angle, new_position, new_angle, goal[i])
    
    print("state: " + str(state) + " action: " + str(ac) + " reward: " + str(reward))
    
    best_q = np.amax(Q[state])

    Q[state][ac] = learning_rate * (reward + 0.99 * best_q - Q[state][ac])
    
    if get_distance_from_goal(new_position, goal[i]):
        i = (i + 1) % 2
    
    random_rate = max(0.1, random_rate * 0.9995)
    learning_rate = max(0.1, learning_rate * 0.9999)

    f = open(path, "w")
    f.write(str(iterations) + "\n")
    f.write(str(random_rate) + "\n")
    f.write(str(learning_rate) + "\n")

    for q in Q:
        for j in q:
            f.write(str(j) + " ")
        f.write("\n")
    f.close()

    iterations += 1


Connected to remoteApi server.
[92m Pioneer_p3dx_ultrasonicSensor1#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor2#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor3#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor4#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor5#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor6#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor7#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor8#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor9#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor10#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor11#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor12#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor13#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor14#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor15#0 connected.
[92m Pioneer_p3dx_ultrasonicSensor16#0 connected.
[92m Vision sensor connected.
[92m Laser connected.
[92m Left motor connected.
[92m Right motor connected.
[92m Robot connected.
nad: 48.93030130876002
oad: 

nad: 57.50062100193003
oad: 11.141846851306752
nd: 21.196210845333255
od: 21.27010170336723
state: 4 action: 1 reward: -42.66423124892444
nad: 89.82128775691058
oad: 146.92346620158992
nd: 5.218872191009742
od: 5.178914892828138
state: 71 action: 1 reward: 55.10431353559913
nad: 171.77846690954652
oad: 114.42535493393086
nd: 21.12613816628889
od: 21.1530206001542
state: 38 action: 1 reward: -56.00899028235014
nad: 24.79524508329291
oad: 32.786434009465154
nd: 4.824763059709918
od: 5.257692857371589
state: 109 action: 0 reward: 29.637678809255817
nad: 131.2350191037653
oad: 177.97250960444137
nd: 21.6699860286753
od: 21.603039989358077
state: 59 action: 1 reward: 43.390188534814925
nad: 24.549457685252946
oad: 21.152121193126106
nd: 4.765075525306254
od: 4.779883841594384
state: 7 action: 2 reward: -2.6569206777203256
nad: 134.0163806377662
oad: 177.47168253009204
nd: 21.67733153833028
od: 21.666591728793982
state: 59 action: 1 reward: 42.91831141551101
nad: 75.10813898581091
oad: 18.41

nad: 38.24498800268361
oad: 21.604448405350183
nd: 20.44857410709194
od: 20.46634103013111
state: 7 action: 2 reward: -15.752193445374822
nad: 97.35473220823684
oad: 104.56160701389211
nd: 6.637633019273128
od: 6.5841954759281665
state: 35 action: 0 reward: 4.534997638407193
nad: 3.372774921151745
oad: 50.421400448945235
nd: 20.120659620643654
od: 20.14272020725027
state: 103 action: 1 reward: 48.15165485812435
nad: 155.73574880576882
oad: 145.28322544046813
nd: 6.606433622781636
od: 6.616119712574976
state: 48 action: 1 reward: -9.968218875633683
nad: 113.45608981557126
oad: 55.10025317427238
nd: 20.064082675005896
od: 20.105996656078535
state: 18 action: 1 reward: -56.26013758766692
nad: 38.67873654803947
oad: 96.9989760852394
nd: 6.676676226787785
od: 6.63279282148833
state: 88 action: 1 reward: 56.12606927222721
nad: 127.74652710208983
oad: 171.88823106205382
nd: 20.04797614124139
od: 20.028486912385794
state: 57 action: 2 reward: 43.167242517184285
nad: 96.71948547128716
oad: 82.6

nad: 165.57634132891008
oad: 165.10612090239727
nd: 16.95154405698219
od: 16.428100742006084
state: 65 action: 0 reward: -26.64238617531811
nad: 45.15097794607101
oad: 2.8344948703876867
nd: 9.25755872133253
od: 9.320626516803856
state: 1 action: 1 reward: -39.163093302117005
nad: 108.15539726391762
oad: 117.28948290497806
nd: 17.179373701583152
od: 17.023357976091045
state: 81 action: 0 reward: 1.3332993664550727
nad: 9.426401879750415
oad: 58.393990516857116
nd: 8.968950638994105
od: 8.982923755675413
state: 19 action: 2 reward: 49.66624447117211
nad: 110.96635978884177
oad: 157.91077684288905
nd: 17.196492268862166
od: 17.174756234876597
state: 67 action: 1 reward: 45.85761535476884
nad: 120.86643364860907
oad: 56.042397904541815
nd: 8.907612191191031
od: 8.954661788809338
state: 19 action: 1 reward: -62.47155586315195
nad: 93.27856538056488
oad: 46.013979367859406
nd: 17.211978775615965
od: 17.246155313856647
state: 105 action: 2 reward: -45.555759100671345
nad: 67.78479217631548
o

nad: 138.64085496308314
oad: 157.85898524315905
nd: 18.4348871109432
od: 18.42624869752773
state: 53 action: 1 reward: 18.78620960930231
nad: 52.57316279990664
oad: 3.3057704900317617
nd: 9.417913674484042
od: 9.428850293277236
state: 1 action: 2 reward: -48.720561370215194
nad: 137.12124671928086
oad: 171.76217307752836
nd: 18.432310212818454
od: 18.416862400170153
state: 57 action: 1 reward: 33.868535725832466
nad: 50.94840877754666
oad: 1.7764966332865129
nd: 9.422758728978785
od: 9.430809968602683
state: 1 action: 2 reward: -48.76935016306527
nad: 163.76000996051675
oad: 173.3854801564895
nd: 18.915703915660636
od: 18.412388929347056
state: 58 action: 0 reward: -15.540279119706234
nad: 67.097011287469
oad: 63.82557767206197
nd: 8.935763086948436
od: 9.172687379758713
state: 99 action: 0 reward: 8.57478102510684
nad: 144.93031763702925
oad: 164.2412528248559
nd: 19.528082961859788
od: 19.463893515814025
state: 55 action: 1 reward: 16.101462885538517
nad: 6.193086680244221
oad: 16.99

nad: 109.97799867928859
oad: 159.25490454443195
nd: 23.48118698875773
od: 23.412782976063195
state: 67 action: 1 reward: 45.85670523041655
nad: 8.58849996604215
oad: 53.11911065485697
nd: 2.6286196713940253
od: 2.6208681802396883
state: 18 action: 2 reward: 44.143036131097965
nad: 145.76682980487863
oad: 155.4048679305597
nd: 23.477995690229054
od: 23.46318685405969
state: 68 action: 2 reward: 8.897596317212816
nad: 68.49870247428464
oad: 49.30411669357153
nd: 2.3795645967064143
od: 2.602716912986227
state: 104 action: 0 reward: -8.036969966722472
nad: 87.31619188188216
oad: 137.20200821280457
nd: 23.913329888380417
od: 23.83535464611714
state: 46 action: 2 reward: 45.9870542177585
nad: 178.16892217810584
oad: 119.9951328451101
nd: 2.3079296782614835
od: 2.32384513287404
state: 80 action: 2 reward: -57.37801660236791
nad: 19.88023602682506
oad: 30.23079810379943
nd: 23.607550897461163
od: 23.946645346809323
state: 10 action: 0 reward: 27.30528454438236
nad: 133.96725679055072
oad: 172.

nad: 105.9265487839466
oad: 161.1886241731935
nd: 23.39486075000307
od: 23.366575018740427
state: 66 action: 1 reward: 53.847788826114794
nad: 31.33903382901684
oad: 75.97349182816924
nd: 2.6321585360571618
od: 2.6064286655304816
state: 25 action: 2 reward: 43.34796447281839
nad: 151.87095610824156
oad: 151.145063559633
nd: 23.37857151594998
od: 23.37006464067189
state: 70 action: 2 reward: -1.151236312512907
nad: 15.39821582420467
oad: 24.595229626810408
nd: 2.631238100931344
od: 2.62591761973333
state: 112 action: 1 reward: 8.930989742705034
nad: 156.7716954331895
oad: 165.8826750522395
nd: 23.787498144817476
od: 23.371792936955824
state: 65 action: 0 reward: -11.674280774032617
nad: 40.0216118513743
oad: 31.30077820742565
nd: 1.8037786731605265
od: 2.2327349879275658
state: 10 action: 0 reward: 12.726982094403308
nad: 111.02797345585435
oad: 157.2619191003174
nd: 24.347412119063815
od: 24.272145202349066
state: 68 action: 1 reward: 42.47059980872559
nad: 49.25348514813447
oad: 86.87

nad: 173.29481162402283
oad: 173.33164447354102
nd: 2.4359339577595116
od: 2.435844028757698
state: 62 action: 1 reward: 0.0323363994275061
nad: 79.6774987703289
oad: 79.66512137907239
nd: 25.181598228405143
od: 25.181650874328625
state: 27 action: 0 reward: -0.009745095082408284
nad: 173.1904658359955
oad: 173.28144735304795
nd: 2.436216663496963
od: 2.4359699336159912
state: 62 action: 1 reward: 0.07864502300386267
nad: 79.7667961592112
oad: 79.7618351334259
nd: 25.181222535903718
od: 25.181242012656114
state: 27 action: 0 reward: -0.003987188165503142
nad: 173.1700074547972
oad: 173.18513123616344
nd: 2.4362709563122777
od: 2.4362290833584948
state: 62 action: 0 reward: 0.01303013367709216
nad: 79.83754617643388
oad: 79.78078004957985
nd: 25.180928688746988
od: 25.181161597877498
state: 27 action: 1 reward: -0.0451206703285294
nad: 173.07585795331124
oad: 173.1088599163861
nd: 2.4365027883256642
od: 2.4364283962705846
state: 62 action: 1 reward: 0.029282360320884493
nad: 79.86719265

nad: 0.22788414675483182
oad: 52.74987829662365
nd: 21.056271952607034
od: 21.03480326651063
state: 18 action: 2 reward: 51.448559845048564
nad: 157.2169700351402
oad: 164.8746293269537
nd: 5.518200024207078
od: 5.090439915408097
state: 55 action: 0 reward: -13.730346148135574
nad: 58.310961588269265
oad: 9.544814683516506
nd: 20.526829042756212
od: 20.597711544458797
state: 117 action: 2 reward: -45.222021819623535
nad: 48.55492530894401
oad: 108.31306187470136
nd: 5.635951444236439
od: 5.592672460865142
state: 36 action: 2 reward: 57.59418739719249
nad: 75.7330847618772
oad: 118.1328596787597
nd: 20.51576094686446
od: 20.483081448049997
state: 81 action: 1 reward: 40.765799976159336
nad: 148.65931872299655
oad: 90.83822590320437
nd: 5.56728172775884
od: 5.604875044223531
state: 30 action: 1 reward: -55.94142699655762
nad: 8.637720653499741
oad: 18.18290999325842
nd: 20.089283201020113
od: 20.547972600497072
state: 114 action: 0 reward: 32.47965931360666
nad: 114.28863683109677
oad: 1

nad: 109.95117822716816
oad: 158.14442311721325
nd: 25.607724090801458
od: 25.52596332569109
state: 53 action: 2 reward: 44.10520663452674
nad: 6.985630144553397
oad: 4.480821510989472
nd: 0.8817615002204839
od: 1.3487377789820005
state: 1 action: 0 reward: 20.844005304511903
nad: 151.08104246655262
oad: 101.54505664944958
nd: 25.686197619730294
od: 25.706319914794822
state: 34 action: 1 reward: -48.529871063876634
nad: 84.38774715545799
oad: 39.349239254974236
nd: 0.6085472853318941
od: 0.8175512623539682
state: 13 action: 0 reward: -34.588309049380044
nad: 161.1993988125264
oad: 160.83222657139004
nd: 26.619864746463175
od: 26.135904526668806
state: 54 action: 0 reward: -24.565183230854814
nad: 179.0740361696096
oad: 127.63487303434988
nd: 0.7801102173982374
od: 0.7647131280834176
state: 43 action: 1 reward: -52.20901760100071
nad: 95.59932120584233
oad: 150.76177193211635
nd: 26.7176228389657
od: 26.68415754309634
state: 70 action: 1 reward: 53.48918593280607
nad: 65.17664210831526


nad: 33.66730639501344
oad: 17.982517065369336
nd: 0.8863922886005609
od: 0.9012076308552944
state: 6 action: 2 reward: -14.944022216907426
nad: 18.946950586210335
oad: 76.30847252116149
nd: 25.712140593833784
od: 25.683838614070076
state: 25 action: 2 reward: 55.946422946765786
nad: 49.476128841257434
oad: 90.07529249721358
nd: 0.8755843766126142
od: 0.845475321417461
state: 90 action: 1 reward: 39.09371089619848
nad: 16.051204099510755
oad: 60.48510592648171
nd: 25.707655438803336
od: 25.688060980726878
state: 20 action: 2 reward: 43.45417892314804
nad: 152.3206842425161
oad: 93.38474738878546
nd: 0.80493124165985
od: 0.8421091560153979
state: 89 action: 2 reward: -57.07704113595325
nad: 97.01645182580529
oad: 41.295311707016594
nd: 25.66299628078667
od: 25.70034742359902
state: 106 action: 2 reward: -53.853582978171126
nad: 88.85511864290663
oad: 148.9118338599443
nd: 0.8371666611099151
od: 0.8010097203067488
state: 50 action: 2 reward: 58.24886817687935
nad: 110.60259798734779
oad:

nad: 141.15539921459344
oad: 128.90158722369176
nd: 7.003659704089703
od: 6.674957608971432
state: 43 action: 0 reward: -28.688916746815202
nad: 52.052894020284896
oad: 50.77040413744078
nd: 18.769704666454643
od: 19.10719529514236
state: 103 action: 0 reward: 15.592041551541769
nad: 146.0374949157009
oad: 143.76927296424668
nd: 7.863615241032613
od: 7.432804077165158
state: 48 action: 0 reward: -23.808780144826972
nad: 5.864159796975741
oad: 53.341351501406564
nd: 18.43140355893953
od: 18.45063825177248
state: 102 action: 1 reward: 48.43892634607846
nad: 157.09706979073178
oad: 165.75621919451987
nd: 8.336489650450579
od: 7.908087386109624
state: 65 action: 0 reward: -12.76096381325964
nad: 43.1725999296969
oad: 4.115634343710212
nd: 17.8840681221495
od: 17.954216661149196
state: 1 action: 2 reward: -35.549538636001884
nad: 147.2235969335402
oad: 155.07123462323307
nd: 8.823933214307088
od: 8.404936576283376
state: 52 action: 0 reward: -13.102194211492726
nad: 5.2622969554161045
oad: 

nad: 87.40719006613074
oad: 131.34305489168287
nd: 13.22350486133141
od: 13.191897786107349
state: 44 action: 2 reward: 42.35551106434909
nad: 27.996234996145347
oad: 78.53382821632505
nd: 13.008650710209555
od: 12.973391529281743
state: 94 action: 1 reward: 48.77463417378914
nad: 149.08226429681713
oad: 138.06132709800943
nd: 13.601723048609934
od: 13.184983620622152
state: 46 action: 0 reward: -31.857908598196794
nad: 67.99618318141881
oad: 18.857529251366202
nd: 12.469930773587299
od: 12.542700241970556
state: 114 action: 2 reward: -45.50018051088978
nad: 40.86055021519894
oad: 99.90720689218
nd: 13.714892860205492
od: 13.675298638365923
state: 33 action: 2 reward: 57.06694558500265
nad: 78.16242463904388
oad: 127.15986753819993
nd: 12.460885337632007
od: 12.427484256523368
state: 78 action: 1 reward: 47.32738884372409
nad: 42.33379443026479
oad: 89.65233997315221
nd: 13.716894999911267
od: 13.686479316900055
state: 30 action: 2 reward: 45.797761392326834
nad: 178.60004441787146
oad

nad: 9.460462915708888
oad: 54.25671971300659
nd: 11.73617755887017
od: 11.719374888575139
state: 18 action: 2 reward: 43.95612328254619
nad: 138.7159366767724
oad: 161.3662574828819
nd: 14.357960976358891
od: 14.346512959642673
state: 66 action: 2 reward: 22.077919970298634
nad: 0.02666561375394849
oad: 50.090316561255804
nd: 11.744546064406727
od: 11.718211310973933
state: 103 action: 1 reward: 48.74691327586217
nad: 140.70582635269284
oad: 170.87887664072832
nd: 14.350046092360264
od: 14.336737215860603
state: 63 action: 2 reward: 29.507606463052447
nad: 59.54569744004601
oad: 48.10092586457745
nd: 11.442926575413683
od: 11.726144333481756
state: 104 action: 0 reward: 2.716116327935076
nad: 79.97857559522288
oad: 132.97338230500196
nd: 14.786421430913656
od: 14.710554158929533
state: 44 action: 2 reward: 49.20144311057294
nad: 62.38906457108948
oad: 112.91955242417578
nd: 11.416572918618842
od: 11.376312276108996
state: 82 action: 1 reward: 48.51745572759399
nad: 80.02736893757094
o

nad: 61.97989384731295
oad: 10.399737772663684
nd: 6.9362522888982445
od: 7.0136191960964815
state: 117 action: 2 reward: -47.71181071473742
nad: 125.80951207315314
oad: 134.60864120282918
nd: 19.57140708016078
od: 19.27691341193773
state: 45 action: 0 reward: -5.9255542814763515
nad: 129.05938031174355
oad: 75.92790077970744
nd: 6.739616839311158
od: 6.789510656576503
state: 95 action: 2 reward: -50.63678866876885
nad: 115.66664547917821
oad: 73.31610604505232
nd: 19.607931225724627
od: 19.640923117090473
state: 24 action: 1 reward: -40.70094486583359
nad: 80.49347887323648
oad: 86.59531009239475
nd: 6.68023926773378
od: 6.769881225868441
state: 91 action: 0 reward: 10.583929125891311
nad: 77.6442938878264
oad: 127.0750443582105
nd: 19.971208266344842
od: 19.89377776153806
state: 42 action: 2 reward: 45.559225230045065
nad: 81.13560939944773
oad: 130.7377083260546
nd: 6.668923371227957
od: 6.634284972915424
state: 76 action: 1 reward: 47.87017901098019
nad: 139.27323163953417
oad: 127