In [47]:
from replay_reader import read_snoopy, parse_bullets, width, height, player_r, bullet_r

In [103]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [177]:
def feats(row, player):
    pos = np.array([row[f'x{player}'] / width, row[f'y{player}'] / height])
    pos_score = (1 - 4 * (pos[0] - 0.5) ** 2 +
                 np.log(2 / 3 - abs(pos[1] - 2 / 3) + 0.01))
    
    bullets = parse_bullets(row['bullets'])
    if bullets.empty:
        bullet_score = 0
    else:
        bullets['x'] /= width
        bullets['y'] /= height
        leeway = (player_r + bullet_r) / max(width, height)
        bullet_score = sum(np.log(np.linalg.norm(vec - pos) - leeway) for vec in bullets.query(f'player != "Player {player}"')[['x', 'y']].values)
    
    curr_angle = np.arctan2(row[f'y{3-player}'] - row[f'y{player}'],
                            row[f'x{3-player}'] - row[f'x{player}'])
    orientation_score = np.pi * 2 - abs(row[f'a{player}'] - curr_angle)
    
    return (pos_score, bullet_score, orientation_score)

In [178]:
def feat_df(game_df):
    rows = []
    for row in game_df.to_dict(orient='records'):
        pos, bullets, angle = feats(row, 1)
        pos2, bullets2, angle2 = feats(row, 2)
        rows.append({
            'pos1': pos,
            'bullet1': bullets,
            'orient1': angle,
            'pos2': pos2,
            'bullet2': bullets2,
            'orient2': angle2
        })
    return pd.DataFrame(rows)

In [214]:
class QLearner:
    def __init__(self, pos=0.0, bullets=0.0, angle=0.0, courage=1.0):
        self.pos = pos
        self.bullets = bullets
        self.angle = angle
        self.courage = courage
        
    def learn(self, f_df, winner, alpha=0.01, gamma=0.9):
        final_time = max(f_df.index)
        s = {}
        for index in f_df[~f_df.isna().any(axis=1)].index:
            for i in (1, 2):
                s[i] = np.sum(np.array([self.pos, self.bullets, self.angle]) * f_df.loc[index, [f'pos{i}', f'bullet{i}', f'orient{i}']].values)
            for i in (1, 2):
                real_q = gamma ** (final_time - index) * (1 if winner == i else -1)
                q = self.courage * s[i] - s[3 - i]
                print(real_q, q, self)
                delta = real_q - q
                # courage is really just adding new features with the same weights: it's a hyperparameter, not a parameter
                self.pos += alpha * delta * self.courage * f_df.loc[index, f'pos{i}']
                self.pos -= alpha * delta * f_df.loc[index, f'pos{3-i}']
                self.bullets += alpha * delta * self.courage * f_df.loc[index, f'bullet{i}']
                self.bullets -= alpha * delta * f_df.loc[index, f'bullet{3-i}']
                self.angle += alpha * delta * self.courage * f_df.loc[index, f'orient{i}']
                self.angle -= alpha * delta * f_df.loc[index, f'orient{3-i}']
                
        return (self.pos, self.bullets, self.angle)
    
    def __repr__(self):
        return f"QLearner(pos={self.pos:.2}, bullets={self.bullets:.2}, angle={self.angle:.2}, courage={self.courage:.2})"

In [215]:
q = QLearner()
for n_round, i in tqdm(enumerate(range(1, 41))):
    print(f"Game {i}")
    g = read_snoopy(f'../replays/game{i}.snoopy')
    if g.result != 0:
        winner = 1 if g.result == 1 else -1
        q.learn(feat_df(g.to_df()), winner, alpha=1/(10 + n_round))
        print(q)

0it [00:00, ?it/s]

Game 1


1it [00:00,  2.73it/s]

0.0011790184577738603 0.0 QLearner(pos=0.0, bullets=0.0, angle=0.0, courage=1.0)
-0.0011790184577738603 0.0 QLearner(pos=7.1e-05, bullets=0.0, angle=-0.00028, courage=1.0)
0.0013100205086376223 0.0014032911934899735 QLearner(pos=0.00014, bullets=0.0, angle=-0.00056, courage=1.0)
-0.0013100205086376223 -0.0014032911934899735 QLearner(pos=0.00014, bullets=0.0, angle=-0.00054, courage=1.0)
0.0014555783429306916 0.0012895691893319349 QLearner(pos=0.00013, bullets=0.0, angle=-0.00051, courage=1.0)
-0.0014555783429306916 -0.0012895691893319349 QLearner(pos=0.00014, bullets=0.0, angle=-0.00055, courage=1.0)
0.0016173092699229906 0.0014807961171382413 QLearner(pos=0.00015, bullets=0.0, angle=-0.00059, courage=1.0)
-0.0016173092699229906 -0.0014807961171382413 QLearner(pos=0.00016, bullets=0.0, angle=-0.00062, courage=1.0)
0.001797010299914434 0.0016336425403777409 QLearner(pos=0.00017, bullets=0.0, angle=-0.00066, courage=1.0)
-0.001797010299914434 -0.0016336425403777409 QLearner(pos=0.00018, 

2it [00:00,  2.72it/s]

0.0011790184577738603 2.5965151021785408e+17 QLearner(pos=8e+16, bullets=-1.3e+17, angle=-8.9e+16, courage=1.0)
-0.0011790184577738603 -2.5965151021785408e+17 QLearner(pos=6.6e+16, bullets=-1.3e+17, angle=-3.4e+16, courage=1.0)
0.0013100205086376223 -2.1405825411801136e+16 QLearner(pos=5.1e+16, bullets=-1.3e+17, angle=2.2e+16, courage=1.0)
-0.0013100205086376223 2.1405825411801136e+16 QLearner(pos=5.3e+16, bullets=-1.3e+17, angle=1.8e+16, courage=1.0)
0.0014555783429306916 2101525633431976.0 QLearner(pos=5.4e+16, bullets=-1.3e+17, angle=1.3e+16, courage=1.0)
-0.0014555783429306916 -2101525633431976.0 QLearner(pos=5.4e+16, bullets=-1.3e+17, angle=1.4e+16, courage=1.0)
0.0016173092699229906 416453173726836.0 QLearner(pos=5.4e+16, bullets=-1.3e+17, angle=1.4e+16, courage=1.0)
-0.0016173092699229906 -416453173726836.0 QLearner(pos=5.4e+16, bullets=-1.3e+17, angle=1.4e+16, courage=1.0)
0.001797010299914434 705756370935044.0 QLearner(pos=5.4e+16, bullets=-1.3e+17, angle=1.4e+16, courage=1.0)

3it [00:01,  2.41it/s]

0.00017696434542799797 3.1098744618389693e+34 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=-1.4e+34, courage=1.0)
-0.00017696434542799797 -3.1098744618389693e+34 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=-7.9e+33, courage=1.0)
0.00019662705047555332 5.915693790188074e+33 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=-2.2e+33, courage=1.0)
-0.00019662705047555332 -5.915693790188074e+33 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=-1.1e+33, courage=1.0)
0.00021847450052839255 1.2372150654369833e+33 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=-5e+30, courage=1.0)
-0.00021847450052839255 -1.2372150654369833e+33 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=2.2e+32, courage=1.0)
0.0002427494450315473 3.613531096881024e+32 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=4.4e+32, courage=1.0)
-0.0002427494450315473 -3.613531096881024e+32 QLearner(pos=1.2e+34, bullets=-1.9e+34, angle=5.1e+32, courage=1.0)
0.0002697216055906081 2.1233143127630077e+32 QLearner(pos=1.2e+34, bullets=-1.9e+34

4it [00:01,  2.20it/s]

0.00017696434542799797 -4.684468810385035e+69 QLearner(pos=-1.2e+69, bullets=2.7e+69, angle=2.1e+69, courage=1.0)
-0.00017696434542799797 4.684468810385035e+69 QLearner(pos=-1.2e+69, bullets=2.7e+69, angle=1.3e+69, courage=1.0)
0.00019662705047555332 -1.1791753226017234e+69 QLearner(pos=-1.1e+69, bullets=2.7e+69, angle=4.9e+68, courage=1.0)
-0.00019662705047555332 1.1791753226017234e+69 QLearner(pos=-1.1e+69, bullets=2.7e+69, angle=2.9e+68, courage=1.0)
0.00021847450052839255 -3.1111452810734257e+68 QLearner(pos=-1.1e+69, bullets=2.7e+69, angle=9e+67, courage=1.0)
-0.00021847450052839255 3.1111452810734257e+68 QLearner(pos=-1.1e+69, bullets=2.7e+69, angle=3.8e+67, courage=1.0)
0.0002427494450315473 -9.414991415523917e+67 QLearner(pos=-1.1e+69, bullets=2.7e+69, angle=-1.4e+67, courage=1.0)
-0.0002427494450315473 9.414991415523917e+67 QLearner(pos=-1.1e+69, bullets=2.7e+69, angle=-2.9e+67, courage=1.0)
0.0002697216055906081 -3.967335165195792e+67 QLearner(pos=-1.1e+69, bullets=2.7e+69, a

  bullet_score = sum(np.log(np.linalg.norm(vec - pos) - leeway) for vec in bullets.query(f'player != "Player {player}"')[['x', 'y']].values)
5it [00:02,  2.47it/s]

0.00636268544113595 -2.0622561010370813e+102 QLearner(pos=2.2e+102, bullets=-5e+102, angle=-3.8e+102, courage=1.0)
-0.00636268544113595 2.0622561010370813e+102 QLearner(pos=2.1e+102, bullets=-5e+102, angle=-3.8e+102, courage=1.0)
0.007069650490151055 -1.9181193643153678e+102 QLearner(pos=2e+102, bullets=-5e+102, angle=-3.8e+102, courage=1.0)
-0.007069650490151055 1.9181193643153678e+102 QLearner(pos=1.9e+102, bullets=-5e+102, angle=-3.8e+102, courage=1.0)
0.00785516721127895 -1.7864576806772187e+102 QLearner(pos=1.8e+102, bullets=-5e+102, angle=-3.8e+102, courage=1.0)
-0.00785516721127895 1.7864576806772187e+102 QLearner(pos=1.7e+102, bullets=-5e+102, angle=-3.7e+102, courage=1.0)
0.008727963568087723 -1.6640337477285323e+102 QLearner(pos=1.6e+102, bullets=-5e+102, angle=-3.7e+102, courage=1.0)
-0.008727963568087723 1.6640337477285323e+102 QLearner(pos=1.5e+102, bullets=-5e+102, angle=-3.7e+102, courage=1.0)
0.009697737297875247 -1.5503811213773264e+102 QLearner(pos=1.5e+102, bullets=-

6it [00:02,  2.70it/s]

0.00636268544113595 2.907130597863686e+112 QLearner(pos=-4.3e+112, bullets=3.3e+113, angle=-4.3e+111, courage=1.0)
-0.00636268544113595 -2.907130597863686e+112 QLearner(pos=-4.2e+112, bullets=3.3e+113, angle=-4.6e+111, courage=1.0)
0.007069650490151055 2.7192093288844143e+112 QLearner(pos=-4e+112, bullets=3.3e+113, angle=-4.9e+111, courage=1.0)
-0.007069650490151055 -2.7192093288844143e+112 QLearner(pos=-3.9e+112, bullets=3.3e+113, angle=-5.1e+111, courage=1.0)
0.00785516721127895 2.548956988074274e+112 QLearner(pos=-3.8e+112, bullets=3.3e+113, angle=-5.4e+111, courage=1.0)
-0.00785516721127895 -2.548956988074274e+112 QLearner(pos=-3.7e+112, bullets=3.3e+113, angle=-5.6e+111, courage=1.0)
0.008727963568087723 2.3909827097799127e+112 QLearner(pos=-3.5e+112, bullets=3.3e+113, angle=-5.8e+111, courage=1.0)
-0.008727963568087723 -2.3909827097799127e+112 QLearner(pos=-3.4e+112, bullets=3.3e+113, angle=-6.1e+111, courage=1.0)
0.009697737297875247 2.2452437172264087e+112 QLearner(pos=-3.3e+11

7it [00:02,  2.58it/s]

-0.0007735540101454297 -4.804395118490048e+121 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=1.1e+121, courage=1.0)
-0.0007735540101454297 4.804395118490048e+121 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=1.6e+120, courage=1.0)
-0.0008595044557171441 8.548722346414219e+120 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=-7.6e+120, courage=1.0)
-0.0008595044557171441 -8.548722346414219e+120 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=-6e+120, courage=1.0)
-0.0009550049507968268 -7.859209585188705e+119 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=-4.3e+120, courage=1.0)
-0.0009550049507968268 7.859209585188705e+119 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=-4.5e+120, courage=1.0)
-0.001061116611996474 1.1045549523179602e+120 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=-4.6e+120, courage=1.0)
-0.001061116611996474 -1.1045549523179602e+120 QLearner(pos=1.1e+122, bullets=-8.4e+122, angle=-4.4e+120, courage=1.0)
-0.0011790184577738603 1.1724073497869181e+120 QLearner(

9it [00:03,  3.23it/s]

-0.0007735540101454297 8.213050152370476e+139 QLearner(pos=2e+139, bullets=-1.3e+140, angle=-2.8e+139, courage=1.0)
-0.0007735540101454297 -8.213050152370476e+139 QLearner(pos=2e+139, bullets=-1.3e+140, angle=-1.3e+139, courage=1.0)
-0.0008595044557171441 -8.226041058767782e+138 QLearner(pos=2.1e+139, bullets=-1.3e+140, angle=1.7e+138, courage=1.0)
-0.0008595044557171441 8.226041058767782e+138 QLearner(pos=2.1e+139, bullets=-1.3e+140, angle=2.7e+137, courage=1.0)
-0.0009550049507968268 9.521568404256623e+137 QLearner(pos=2.1e+139, bullets=-1.3e+140, angle=-1.2e+138, courage=1.0)
-0.0009550049507968268 -9.521568404256623e+137 QLearner(pos=2.1e+139, bullets=-1.3e+140, angle=-1e+138, courage=1.0)
-0.001061116611996474 9.55302120092095e+136 QLearner(pos=2.1e+139, bullets=-1.3e+140, angle=-8.7e+137, courage=1.0)
-0.001061116611996474 -9.55302120092095e+136 QLearner(pos=2.1e+139, bullets=-1.3e+140, angle=-8.5e+137, courage=1.0)
-0.0011790184577738603 2.5503854008800182e+137 QLearner(pos=2.1e

11it [00:03,  3.86it/s]

-0.531441 6.575756098667719e+155 QLearner(pos=4.5e+155, bullets=-4.2e+156, angle=-8.6e+155, courage=1.0)
0.5904900000000001 -2.7973234276398003e+156 QLearner(pos=5e+155, bullets=-4.2e+156, angle=-8.5e+155, courage=1.0)
-0.5904900000000001 2.7973234276398003e+156 QLearner(pos=7.1e+155, bullets=-4.1e+156, angle=-8.2e+155, courage=1.0)
0.6561 -7.49390673190241e+156 QLearner(pos=9.2e+155, bullets=-4e+156, angle=-7.8e+155, courage=1.0)
-0.6561 7.49390673190241e+156 QLearner(pos=1.5e+156, bullets=-3.1e+156, angle=-7.3e+155, courage=1.0)
0.7290000000000001 -3.63946616543525e+156 QLearner(pos=2e+156, bullets=-2.3e+156, angle=-6.8e+155, courage=1.0)
-0.7290000000000001 3.63946616543525e+156 QLearner(pos=2.3e+156, bullets=-1.8e+156, angle=-6.1e+155, courage=1.0)
0.81 -7.113153836042347e+155 QLearner(pos=2.6e+156, bullets=-1.3e+156, angle=-5.5e+155, courage=1.0)
-0.81 7.113153836042347e+155 QLearner(pos=2.7e+156, bullets=-1.2e+156, angle=-5.3e+155, courage=1.0)
0.9 -1.5897457408220173e+156 QLearn

13it [00:03,  4.54it/s]

 QLearner(pos=8.5e+153, bullets=-3.2e+155, angle=-1.6e+156, courage=1.0)
-0.3486784401000001 -1.7344588100862551e+155 QLearner(pos=-2.9e+153, bullets=-3.2e+155, angle=-1.6e+156, courage=1.0)
0.3874204890000001 1.5452585708029713e+155 QLearner(pos=-1.4e+154, bullets=-3.2e+155, angle=-1.6e+156, courage=1.0)
-0.3874204890000001 -1.5452585708029713e+155 QLearner(pos=-2.4e+154, bullets=-3.3e+155, angle=-1.6e+156, courage=1.0)
0.4304672100000001 -5.525548394881908e+154 QLearner(pos=-3.5e+154, bullets=-3.3e+155, angle=-1.6e+156, courage=1.0)
-0.4304672100000001 5.525548394881908e+154 QLearner(pos=-3.1e+154, bullets=-3.3e+155, angle=-1.6e+156, courage=1.0)
0.4782969000000001 -2.3495898179102235e+155 QLearner(pos=-2.7e+154, bullets=-3.3e+155, angle=-1.6e+156, courage=1.0)
-0.4782969000000001 2.3495898179102235e+155 QLearner(pos=-1.1e+154, bullets=-3.3e+155, angle=-1.6e+156, courage=1.0)
0.531441 -4.428533571031843e+155 QLearner(pos=5e+153, bullets=-3.2e+155, angle=-1.6e+156, courage=1.0)
-0.531

14it [00:04,  4.84it/s]

0.027812838944369374 -1.158668460080888e+155 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-8e+154, courage=1.0)
-0.027812838944369374 1.158668460080888e+155 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-7.2e+154, courage=1.0)
0.030903154382632636 -8.630768646755411e+154 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-6.3e+154, courage=1.0)
-0.030903154382632636 8.630768646755411e+154 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-5.7e+154, courage=1.0)
0.03433683820292515 -6.403656822270785e+154 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-5e+154, courage=1.0)
-0.03433683820292515 6.403656822270785e+154 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-4.6e+154, courage=1.0)
0.038152042447694615 -4.707871762402474e+154 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-4.1e+154, courage=1.0)
-0.038152042447694615 4.707871762402474e+154 QLearner(pos=3.2e+155, bullets=-1.3e+154, angle=-3.8e+154, courage=1.0)
0.04239115827521624 -3.4097816976988723e+154 QLearner(pos=3.2e+155, bu

17it [00:04,  6.47it/s]

 2.9279194964711203e+153 QLearner(pos=2.7e+155, bullets=-6.2e+153, angle=-3.4e+154, courage=1.0)
-0.81 6.5589101161922985e+153 QLearner(pos=2.7e+155, bullets=-6.7e+153, angle=-3.4e+154, courage=1.0)
-0.81 -6.5589101161922985e+153 QLearner(pos=2.7e+155, bullets=-5.2e+153, angle=-3.3e+154, courage=1.0)
-0.9 -1.2972700671874437e+154 QLearner(pos=2.7e+155, bullets=-3.7e+153, angle=-3.3e+154, courage=1.0)
-0.9 1.2972700671874437e+154 QLearner(pos=2.7e+155, bullets=-7.5e+153, angle=-3.4e+154, courage=1.0)
QLearner(pos=2.7e+155, bullets=-1.1e+154, angle=-3.5e+154, courage=1.0)
Game 17
-0.06461081889226677 1.6340368917467986e+154 QLearner(pos=2.7e+155, bullets=-1.1e+154, angle=-3.5e+154, courage=1.0)
-0.06461081889226677 -1.6340368917467986e+154 QLearner(pos=2.7e+155, bullets=-1.1e+154, angle=-3.6e+154, courage=1.0)
-0.0717897987691853 1.629910673804788e+154 QLearner(pos=2.7e+155, bullets=-1.1e+154, angle=-3.6e+154, courage=1.0)
-0.0717897987691853 -1.629910673804788e+154 QLearner(pos=2.7e+155

19it [00:04,  7.27it/s]

-0.06461081889226677 -5.6290800956408606e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.8e+154, courage=1.0)
-0.06461081889226677 5.6290800956408606e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.8e+154, courage=1.0)
-0.0717897987691853 -3.964150349414876e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.7e+154, courage=1.0)
-0.0717897987691853 3.964150349414876e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.7e+154, courage=1.0)
-0.07976644307687256 -1.3524460550024376e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.7e+154, courage=1.0)
-0.07976644307687256 1.3524460550024376e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.7e+154, courage=1.0)
-0.08862938119652507 2.004190926094804e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.7e+154, courage=1.0)
-0.08862938119652507 -2.004190926094804e+153 QLearner(pos=2.2e+155, bullets=3.4e+154, angle=-4.7e+154, courage=1.0)
-0.09847709021836118 6.0037915713280595e+153 QLearner(pos=2.2e+155, bullet

21it [00:04,  7.77it/s]

0.05233476330273609 -7.214430597506221e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.4e+154, courage=1.0)
-0.05233476330273609 7.214430597506221e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.4e+154, courage=1.0)
0.058149737003040096 -7.05109605904992e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.4e+154, courage=1.0)
-0.058149737003040096 7.05109605904992e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.4e+154, courage=1.0)
0.06461081889226677 -7.12551803524878e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.5e+154, courage=1.0)
-0.06461081889226677 7.12551803524878e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.5e+154, courage=1.0)
0.0717897987691853 -7.475539151567444e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.5e+154, courage=1.0)
-0.0717897987691853 7.475539151567444e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2.5e+154, courage=1.0)
0.07976644307687256 -8.227881504208881e+153 QLearner(pos=1.7e+155, bullets=1.5e+154, angle=2

22it [00:05,  8.00it/s]

-0.06461081889226677 2.307535347497929e+154 QLearner(pos=1.4e+155, bullets=-5.3e+152, angle=-3.6e+154, courage=1.0)
-0.06461081889226677 -2.307535347497929e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3.5e+154, courage=1.0)
-0.0717897987691853 1.9734162322685624e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3.4e+154, courage=1.0)
-0.0717897987691853 -1.9734162322685624e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3.3e+154, courage=1.0)
-0.07976644307687256 1.630707898651735e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3.2e+154, courage=1.0)
-0.07976644307687256 -1.630707898651735e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3.2e+154, courage=1.0)
-0.08862938119652507 1.2987846600239819e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3.1e+154, courage=1.0)
-0.08862938119652507 -1.2987846600239819e+154 QLearner(pos=1.5e+155, bullets=-5.3e+152, angle=-3e+154, courage=1.0)
-0.09847709021836118 9.80044285250655e+153 QLearner(pos=1.5e+155, bu

23it [00:05,  5.04it/s]

-0.0011790184577738603 4.6796218263112114e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-3.1e+154, courage=1.0)
-0.0011790184577738603 -4.6796218263112114e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-2.9e+154, courage=1.0)
-0.0013100205086376223 3.8213115635995615e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-2.6e+154, courage=1.0)
-0.0013100205086376223 -3.8213115635995615e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-2.4e+154, courage=1.0)
-0.0014555783429306916 3.1158042250110914e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-2.2e+154, courage=1.0)
-0.0014555783429306916 -3.1158042250110914e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-2.1e+154, courage=1.0)
-0.0016173092699229906 2.534163539292142e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-1.9e+154, courage=1.0)
-0.0016173092699229906 -2.534163539292142e+154 QLearner(pos=1.5e+155, bullets=-1.8e+153, angle=-1.8e+154, courage=1.0)
-0.001797010299914434 2.0614558254279064e+154 

24it [00:05,  3.87it/s]

-0.0011790184577738603 -2.6396776908133152e+166 QLearner(pos=4.5e+165, bullets=2.3e+167, angle=1.5e+166, courage=1.0)
-0.0011790184577738603 2.6396776908133152e+166 QLearner(pos=4.4e+165, bullets=2.3e+167, angle=1.4e+166, courage=1.0)
-0.0013100205086376223 -2.1673215886245382e+166 QLearner(pos=4.4e+165, bullets=2.3e+167, angle=1.3e+166, courage=1.0)
-0.0013100205086376223 2.1673215886245382e+166 QLearner(pos=4.4e+165, bullets=2.3e+167, angle=1.2e+166, courage=1.0)
-0.0014555783429306916 -1.775122762782916e+166 QLearner(pos=4.3e+165, bullets=2.3e+167, angle=1.1e+166, courage=1.0)
-0.0014555783429306916 1.775122762782916e+166 QLearner(pos=4.3e+165, bullets=2.3e+167, angle=9.6e+165, courage=1.0)
-0.0016173092699229906 -1.4537676063758893e+166 QLearner(pos=4.3e+165, bullets=2.3e+167, angle=8.7e+165, courage=1.0)
-0.0016173092699229906 1.4537676063758893e+166 QLearner(pos=4.3e+165, bullets=2.3e+167, angle=8e+165, courage=1.0)
-0.001797010299914434 -1.1926682124719268e+166 QLearner(pos=4.3e

25it [00:06,  4.15it/s]

-0.020275559590445275 6.336115667445258e+179 QLearner(pos=1.8e+179, bullets=9e+180, angle=6.1e+179, courage=1.0)
-0.020275559590445275 -6.336115667445258e+179 QLearner(pos=1.6e+179, bullets=9e+180, angle=6e+179, courage=1.0)
-0.022528399544939195 5.7584600289341e+179 QLearner(pos=1.4e+179, bullets=9e+180, angle=5.8e+179, courage=1.0)
-0.022528399544939195 -5.7584600289341e+179 QLearner(pos=1.2e+179, bullets=9e+180, angle=5.7e+179, courage=1.0)
-0.025031555049932437 5.234620527946241e+179 QLearner(pos=1.1e+179, bullets=9e+180, angle=5.6e+179, courage=1.0)
-0.025031555049932437 -5.234620527946241e+179 QLearner(pos=9e+178, bullets=9e+180, angle=5.4e+179, courage=1.0)
-0.027812838944369374 4.755640915838669e+179 QLearner(pos=7.4e+178, bullets=9e+180, angle=5.3e+179, courage=1.0)
-0.027812838944369374 -4.755640915838669e+179 QLearner(pos=6e+178, bullets=9e+180, angle=5.2e+179, courage=1.0)
-0.030903154382632636 4.317310192685765e+179 QLearner(pos=4.6e+178, bullets=9e+180, angle=5.1e+179, co

26it [00:06,  4.17it/s]


-0.06461081889226677 -2.4818901511117323e+180 QLearner(pos=3.1e+180, bullets=3e+180, angle=-1.7e+180, courage=1.0)
-0.0717897987691853 2.3291787828963755e+180 QLearner(pos=3e+180, bullets=3e+180, angle=-1.7e+180, courage=1.0)
-0.0717897987691853 -2.3291787828963755e+180 QLearner(pos=3e+180, bullets=3e+180, angle=-1.8e+180, courage=1.0)
-0.07976644307687256 2.1809970026849606e+180 QLearner(pos=2.9e+180, bullets=3e+180, angle=-1.8e+180, courage=1.0)
-0.07976644307687256 -2.1809970026849606e+180 QLearner(pos=2.8e+180, bullets=3e+180, angle=-1.9e+180, courage=1.0)
-0.08862938119652507 2.0287785085669565e+180 QLearner(pos=2.7e+180, bullets=3e+180, angle=-1.9e+180, courage=1.0)
-0.08862938119652507 -2.0287785085669565e+180 QLearner(pos=2.7e+180, bullets=3e+180, angle=-1.9e+180, courage=1.0)
-0.09847709021836118 1.8737399376895443e+180 QLearner(pos=2.6e+180, bullets=3e+180, angle=-2e+180, courage=1.0)
-0.09847709021836118 -1.8737399376895443e+180 QLearner(pos=2.5e+180, bullets=3e+180, angle=




KeyboardInterrupt: 