In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
from itertools import product
import subprocess
import re
import multiprocessing as mp
import time

In [2]:
train_eval = [(100, 600), (1000, 1500), (5000, 5500)]
# train_eval = [(95, 100)]
seed = [4]
learning_rate = [0.001, 0.01, 0.1]
maps = ['smallClassic', 'mediumClassic', 'originalClassic']

In [3]:
def command(train_eval, seed, learning_rate, maps):
    return f'/usr/bin/python3 /home/mmedicina/IMECCGW/Reinforcement+Learning/pacman/pacman.py -l {maps} --pacman RLAgent_final --timeout 10 -q -x {train_eval[0]} -n {train_eval[1]} -f {seed} -a learning_rate={learning_rate}'

In [4]:
def analyze_config(opts):
    com = command(*opts).split()
    com[1] = com[1].replace('+', ' ')

    start_time = time.perf_counter()
    
    sp = subprocess.Popen(com, stdout=subprocess.PIPE)
    stdout = sp.communicate()[0].decode('utf-8')

    end_time = time.perf_counter()
    
    avg_score_re = re.compile(r'Average Score:.* ([0-9.-]+)')
    avg_score = avg_score_re.search(stdout).group(1)
    
    train_score_re = re.compile(r'([0-9-]+) foi')
    train_score = train_score_re.search(stdout).group(1)
    
    win_rate_re = re.compile(r'Win Rate:.+ \(([0-9.]+)\)')
    win_rate = win_rate_re.search(stdout).group(1)
    
    s = pd.DataFrame.from_records({
        'mapa': opts[3],
        'num_jogos': opts[0][1],
        'num_treino': opts[0][0],
        'num_teste': opts[0][1] - opts[0][0],
        'learning_rate': opts[2],
        'pontos_treino': train_score,
        'pontos_teste': avg_score,
        'razao_vitorias': win_rate,
        'tempo_exec': end_time - start_time,
    }, index=[0])

    return s

def driver():
    N_THREADS = 15
    with mp.Pool(N_THREADS) as pool:
        opts = list(product(train_eval, seed, learning_rate, maps))

        threads = [pool.apply_async(analyze_config, (opt, )) for opt in opts]

        res = [t.get() for t in threads]

        return res

runs = driver()

df = pd.DataFrame()
        
for run in runs:
    df = pd.concat([df, run], ignore_index=True)

cols = ['pontos_teste', 'pontos_treino', 'razao_vitorias']
df[cols] = df[cols].apply(pd.to_numeric, axis=1, errors='coerce')

df.to_csv('./benchmark.csv')

In [5]:
for nome, mapaf in df.copy().groupby('mapa', as_index=False, group_keys=False):
    #fname = f'tables/{nome}.txt'#
    mapaf.drop(['mapa','num_jogos'], axis=1, inplace=True)
    # print(mapaf.dtypes)
    print(nome)
    table = ""
    for idx, row in enumerate(mapaf.iterrows()):
        print(row[1])
        s = str(row[1][0])
        s += ''.join(f' & {int(x)}' for x in row[1][1:3])
        s += ''.join(f' & {x:.1f}' for x in row[1][3:5])
        s += ''.join(f' & {x:.2f}' for x in row[1][5:6])
        s += ''.join(f' & {x:.0f}' for x in row[1][6:7])

        s += r'\\'
        if idx % 3 == 2:
            s += '\midrule'
        s += '\n'
        # s += ' & ' + str(row[1][1])
        table += s

    print(table)


mediumClassic
learning_rate          0.001
num_teste                500
num_treino               100
pontos_teste         1459.99
pontos_treino           1148
razao_vitorias          0.92
tempo_exec        322.958407
Name: 1, dtype: object


ValueError: Unknown format code 'f' for object of type 'str'

Unnamed: 0,learning_rate,mapa,num_jogos,num_teste,num_treino,pontos_teste,pontos_treino,razao_vitorias,tempo_exec
0,0.001,smallClassic,600,500,100,571.022,269,0.63,198.838859
1,0.001,mediumClassic,600,500,100,1459.99,1148,0.92,322.958407
2,0.001,originalClassic,600,500,100,2642.596,2521,0.86,1557.090136
3,0.01,smallClassic,600,500,100,700.434,568,0.75,132.806957
4,0.01,mediumClassic,600,500,100,1484.87,1365,0.93,323.564491
5,0.01,originalClassic,600,500,100,2000.286,2406,0.6,2449.049531
6,0.1,smallClassic,600,500,100,-505.576,-500,0.0,74.199035
7,0.1,mediumClassic,600,500,100,-475.05,-415,0.0,171.916575
8,0.1,originalClassic,600,500,100,1086.034,-214,0.18,1153.533584
9,0.001,smallClassic,1500,500,1000,772.192,724,0.81,329.631347


In [7]:
df.dtypes

learning_rate     float64
mapa               object
num_jogos           int64
num_teste           int64
num_treino          int64
pontos_teste       object
pontos_treino      object
razao_vitorias     object
tempo_exec        float64
dtype: object

Unnamed: 0,learning_rate,mapa,num_jogos,num_teste,num_treino,pontos_teste,pontos_treino,razao_vitorias,tempo_exec
0,0.001,smallClassic,600,500,100,571.022,269.0,0.63,198.838859
1,0.001,mediumClassic,600,500,100,1459.99,1148.0,0.92,322.958407
2,0.001,originalClassic,600,500,100,2642.596,2521.0,0.86,1557.090136
3,0.01,smallClassic,600,500,100,700.434,568.0,0.75,132.806957
4,0.01,mediumClassic,600,500,100,1484.87,1365.0,0.93,323.564491
5,0.01,originalClassic,600,500,100,2000.286,2406.0,0.6,2449.049531
6,0.1,smallClassic,600,500,100,-505.576,-500.0,0.0,74.199035
7,0.1,mediumClassic,600,500,100,-475.05,-415.0,0.0,171.916575
8,0.1,originalClassic,600,500,100,1086.034,-214.0,0.18,1153.533584
9,0.001,smallClassic,1500,500,1000,772.192,724.0,0.81,329.631347
