In [47]:
from replay_reader import read_snoopy, parse_bullets, width, height, player_r, bullet_r

In [103]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [220]:
def feats(row, player):
    pos = np.array([row[f'x{player}'] / width, row[f'y{player}'] / height])
    pos_score = (1 - 4 * (pos[0] - 0.5) ** 2 +
                 np.log(2 / 3 - abs(pos[1] - 2 / 3) + 0.01))
    
    bullets = parse_bullets(row['bullets'])
    if bullets.empty:
        bullet_score = 0
    else:
        bullets['x'] /= width
        bullets['y'] /= height
        leeway = (player_r + bullet_r) / max(width, height)
        bullet_score = sum(np.log(np.linalg.norm(vec - pos) - leeway) for vec in bullets.query(f'player != "Player {player}"')[['x', 'y']].values)
    
    curr_angle = np.arctan2(row[f'y{3-player}'] - row[f'y{player}'],
                            row[f'x{3-player}'] - row[f'x{player}'])
    orientation_score = (np.pi * 2) - abs(row[f'a{player}'] - curr_angle)
    
    return (pos_score, bullet_score, orientation_score)

In [221]:
def feat_df(game_df):
    rows = []
    for row in game_df.to_dict(orient='records'):
        pos, bullets, angle = feats(row, 1)
        pos2, bullets2, angle2 = feats(row, 2)
        rows.append({
            'pos1': pos,
            'bullet1': bullets,
            'orient1': angle,
            'pos2': pos2,
            'bullet2': bullets2,
            'orient2': angle2
        })
    return pd.DataFrame(rows)

In [227]:
def add_values(feat_df, winner, gamma=0.9):
    f_df = feat_df[~feat_df.isna().any(axis=1)].copy()
    t_f = max(feat_df.index)
    for i in (1, 2):
        f_df[f'v{i}'] = [gamma ** (t_f - t) * (100 if winner == i else -100) for t in f_df.index]
    return f_df

In [232]:
f_df = add_values(read_snoopy('../replays/game1.snoopy').to_df(), winner=1)
f_df

Unnamed: 0_level_0,t,x1,y1,vx1,vy1,a1,x2,y2,vx2,vy2,a2,bullets,a1_0,a1_1,a2_0,a2_1,v1,v2
t_int,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,0.03125,649.625,779.875,0.0,-7.5,1.875,774.625,487.0,0.0,-7.5,-1.0,"[[], []]",2.267699,1.482301,-0.607301,-1.392699,1.478088,-1.478088
2,0.0625,649.625,779.5,0.0,-15.0,1.75,774.625,486.75,0.0,-15.0,-0.875,"[[], []]",2.142699,1.357301,-0.482301,-1.267699,1.64232,-1.64232
3,0.09375,649.625,779.0,0.0,-22.375,1.625,774.625,486.125,0.0,-22.375,-0.75,"[[], []]",2.017699,1.232301,-0.357301,-1.142699,1.8248,-1.8248
4,0.125,649.625,778.25,0.0,-29.875,1.5,774.625,485.375,0.0,-29.875,-0.625,"[[], []]",1.892699,1.107301,-0.232301,-1.017699,2.027556,-2.027556
5,0.15625,649.625,777.25,0.0,-37.125,1.375,774.625,484.375,0.0,-37.125,-0.5,"[[], []]",1.767699,0.982301,-0.107301,-0.892699,2.25284,-2.25284
6,0.1875,649.625,776.0,0.0,-44.375,1.25,774.625,483.25,0.0,-44.375,-0.375,"[[], []]",1.642699,0.857301,0.017699,-0.767699,2.503156,-2.503156
7,0.21875,649.625,774.625,0.0,-51.5,1.125,774.625,481.75,0.0,-51.5,-0.25,"[[], []]",1.517699,0.732301,0.142699,-0.642699,2.781284,-2.781284
8,0.25,649.625,773.0,0.0,-58.375,1.0,774.625,480.125,0.0,-58.375,-0.125,"[[], []]",1.392699,0.607301,0.267699,-0.517699,3.090315,-3.090315
9,0.28125,649.625,771.125,0.0,-65.25,0.875,774.625,478.25,0.0,-65.25,0.0,"[[], []]",1.267699,0.482301,0.392699,-0.392699,3.433684,-3.433684
10,0.3125,649.625,769.0,0.0,-71.875,0.75,774.625,476.125,0.0,-71.875,0.125,"[[], []]",1.142699,0.357301,0.517699,-0.267699,3.815204,-3.815204


In [224]:
class QLearner:
    def __init__(self, pos=0.0, bullets=0.0, angle=0.0, courage=1.0):
        self.pos = pos
        self.bullets = bullets
        self.angle = angle
        self.courage = courage
        
    def learn(self, f_df, winner, alpha=0.01, gamma=0.9):
        final_time = max(f_df.index)
        s = {}
        idx = f_df[~f_df.isna().any(axis=1)].index
        last_idx = max(idx)
        for index in idx:
            for i in (1, 2):
                s[i] = np.sum(np.array([self.pos, self.bullets, self.angle]) * f_df.loc[index, [f'pos{i}', f'bullet{i}', f'orient{i}']].values)
            for i in (1, 2):
                win_sign = 1 if winner == i else -1
                v = 100 * win_sign if index == last_index else 0
                real_q = gamma ** (final_time - index) * 
                q = self.courage * s[i] - s[3 - i]
                print(real_q, q, self)
                delta = real_q - q
                # courage is really just adding new features with the same weights: it's a hyperparameter, not a parameter
                self.pos += alpha * delta * self.courage * f_df.loc[index, f'pos{i}']
                self.pos -= alpha * delta * f_df.loc[index, f'pos{3-i}']
                self.bullets += alpha * delta * self.courage * f_df.loc[index, f'bullet{i}']
                self.bullets -= alpha * delta * f_df.loc[index, f'bullet{3-i}']
                self.angle += alpha * delta * self.courage * f_df.loc[index, f'orient{i}']
                self.angle -= alpha * delta * f_df.loc[index, f'orient{3-i}']
                
        return (self.pos, self.bullets, self.angle)
    
    def __repr__(self):
        return f"QLearner(pos={self.pos:.2}, bullets={self.bullets:.2}, angle={self.angle:.2}, courage={self.courage:.2})"

SyntaxError: invalid syntax (<ipython-input-224-4371d5bfa36b>, line 19)

In [250]:
dfs = []
for i in tqdm(range(1, 101)):
    g = read_snoopy(f'../replays/game{i}.snoopy')
    if g.result != 0:
        winner = 1 if g.result == 1 else 2
        dfs.append(add_values(feat_df(g.to_df()), winner))

  bullet_score = sum(np.log(np.linalg.norm(vec - pos) - leeway) for vec in bullets.query(f'player != "Player {player}"')[['x', 'y']].values)
100%|██████████| 100/100 [00:44<00:00,  2.24it/s]


In [251]:
df = pd.concat(dfs)

In [252]:
from sklearn.linear_model import LinearRegression

In [255]:
lr = LinearRegression()
X = df[['bullet1', 'orient1', 'pos1']].values
y = df[['v1']]

lr.fit(X, y)

LinearRegression()

In [256]:
lr.coef_

array([[ 0.27848007,  3.06952571, -0.4701161 ]])