## Exploring Battle Logs

In [4]:
from bs4 import BeautifulSoup


## Constants

In [105]:

INPUT_DIR="../battle_data"
OUTPUT_DIR="../stats_data"

UNKNOWN_POKEMON = '__UNKNOWN_POKEMON__'
NO_POKEMON = '__NO_POKEMON__'
UNKNOWN_RATING = None


## Generating Battle CSV Data

In [106]:
class Player:
    def __init__(self, id, logs):
        self.id = id
        
        for log in logs:
            if log[0] == "player" and log[1] == id and len(log) > 2:
                self.player_name = log[2]
                self.rating = UNKNOWN_RATING if len(log[4]) == 0 else int(log[4])
                break



In [107]:

class Pokemon:
    def __init__(self, player, id, species, logs):
        self.id = id
        self.player = player
        self.species = species
        self._logs = logs



In [108]:
class PokemonTeam:
    def __init__(self, player, logs):
        self.player = player
        self._logs = logs
        self._pokemon_map = {}
        self._pokemon = []
        self._load_pokemon()


    def __len__(self):
        for log in self._logs:
            if log[0] == 'teamsize' and log[1] == self.player.id:
                return int(log[2])

        return 0


    def __getitem__(self, i):
        return self._pokemon[i]


    def __iter__(self):
        for poke in self._pokemon:
            yield poke


    def _load_pokemon(self):
        prefix = self.player.id + "a: "
        count = len(self)
        found_count = 0

        for log in self._logs:
            player_prefix = self.player.id + "a: "
            if log[0] == 'switch' and log[1].startswith(player_prefix):
                p_id = log[1]

                if p_id in self._pokemon_map:
                    continue

                species = p_id.split(" ")[1]

                pokemon = Pokemon(player=self.player, id=p_id, species=species, logs=self._logs)

                self._pokemon_map[p_id] = len(self._pokemon)
                self._pokemon.append(pokemon)
                found_count += 1

        while found_count < count:
            p_id = prefix + UNKNOWN_POKEMON + " " + str(count - found_count)
            pokemon = Pokemon(player=self.player, id=p_id, species=UNKNOWN_POKEMON, logs=self._logs)
            self._pokemon_map[p_id] = len(self._pokemon)
            self._pokemon.append(pokemon)
            found_count += 1




In [121]:
class BattleLogs:
    def __init__(self, logs):
        self._logs = logs
        self.p1 = Player(id="p1", logs=logs)
        self.p2 = Player(id="p2", logs=logs)

 
    @property
    def tier(self):
        for log in self._logs:
            if log[0] == "tier":
                return log[1]

    @property
    def is_rated(self):
        for log in self._logs:
            if log[0] == 'rated':
                return True

        return False

    
    @property
    def rules(self):
        return [x[1] for x in self._logs if x[0] == 'rule']


    @property
    def p1_team(self):
        return PokemonTeam(player=self.p1, logs=self._logs)


    @property
    def p2_team(self):
        return PokemonTeam(player=self.p2, logs=self._logs)


    @property
    def winner(self):
        for log in self._logs:
            if log[0] == "win":
                player_id = log[1]
                break

        if self.p1.id == player_id:
            return self.p1
        elif self.p2.id == player_id:
            return self.p2
        else:
            return None



In [122]:
import pandas as pd

# Number of log files we have in our data.
LOGS_COUNT = 143


In [123]:
with open('../battle_data/0001.html') as file:
    soup = BeautifulSoup(file, "html.parser")
    el = soup.find_all("script", class_="battle-log-data")[0]
    raw_logs = [x.split('|')[1:] for x in el.text.split('\n') if len(x) > 0]

logs = BattleLogs(logs=raw_logs)

for pokemon in logs.p2_team:
    print(pokemon.species)


Duraludon
Archeops
Gothitelle
Regidrago
Darmanitan
Lunala


In [124]:
pokedex = pd.read_csv('../stats_data/dex.csv')


In [125]:
def generate_frame(logs_count):
    idx = [
        'p1_name',
        'p2_name',
        'is_rated_battle',
        'p1_rating',
        'p2_rating',
        'team1_count',
        'team2_count',
        'p1_poke1',
        'p1_poke2',
        'p1_poke3',
        'p1_poke4',
        'p1_poke5',
        'p1_poke6',
        'p2_poke1',
        'p2_poke2',
        'p2_poke3',
        'p2_poke4',
        'p2_poke5',
        'p2_poke6']

    df = pd.DataFrame()
    list = []

    for i in range(logs_count):
        filename = INPUT_DIR + "/" + str(i + 1).zfill(4) + ".html"

        with open(filename) as file:
                soup = BeautifulSoup(file, "html.parser")
                el = soup.find_all("script", class_="battle-log-data")[0]
                raw_logs = [x.split('|')[1:] for x in el.text.split('\n') if len(x) > 0]

        logs = BattleLogs(raw_logs)

        p1_team_len = len(logs.p1_team)
        p2_team_len = len(logs.p2_team)

        list.append(pd.Series([
            logs.p1.player_name,
            logs.p2.player_name,
            logs.is_rated,
            logs.p1.rating,
            logs.p2.rating,
            p1_team_len,
            p2_team_len,
            NO_POKEMON if len(logs.p1_team) <= 0 else logs.p1_team[0].species,
            NO_POKEMON if len(logs.p1_team) <= 1 else logs.p1_team[1].species,
            NO_POKEMON if len(logs.p1_team) <= 2 else logs.p1_team[2].species,
            NO_POKEMON if len(logs.p1_team) <= 3 else logs.p1_team[3].species,
            NO_POKEMON if len(logs.p1_team) <= 4 else logs.p1_team[4].species,
            NO_POKEMON if len(logs.p1_team) <= 5 else logs.p1_team[5].species,
            NO_POKEMON if len(logs.p1_team) <= 0 else logs.p1_team[0].species,
            NO_POKEMON if len(logs.p1_team) <= 1 else logs.p1_team[1].species,
            NO_POKEMON if len(logs.p1_team) <= 2 else logs.p1_team[2].species,
            NO_POKEMON if len(logs.p1_team) <= 3 else logs.p1_team[3].species,
            NO_POKEMON if len(logs.p1_team) <= 4 else logs.p1_team[4].species,
            NO_POKEMON if len(logs.p1_team) <= 5 else logs.p1_team[5].species,
        ]))

    df = pd.concat(list, axis=1).T
    df.columns = idx
    return df



In [127]:
df= generate_frame(143)
df

Unnamed: 0,p1_name,p2_name,is_rated_battle,p1_rating,p2_rating,team1_count,team2_count,p1_poke1,p1_poke2,p1_poke3,p1_poke4,p1_poke5,p1_poke6,p2_poke1,p2_poke2,p2_poke3,p2_poke4,p2_poke5,p2_poke6
0,Rhodes28,sbq1e,True,1391,1440,6,6,Dragapult,Darmanitan,Lycanroc,Lapras,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Dragapult,Darmanitan,Lycanroc,Lapras,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__
1,Xebrilag27,Rage69,True,1754,1784,6,6,Arctovish,Salazzle,Jellicent,Blacephalon,Electivire,Sirfetch’d,Arctovish,Salazzle,Jellicent,Blacephalon,Electivire,Sirfetch’d
2,Raccoonis,WhoIsJoeLigma?,True,1085,1058,6,6,Haxorus,Urshifu,Dubwool,Duraludon,Luxray,__UNKNOWN_POKEMON__,Haxorus,Urshifu,Dubwool,Duraludon,Luxray,__UNKNOWN_POKEMON__
3,bembi,SecondaryStomach,True,1269,1315,6,6,Ludicolo,Thundurus,Magearna,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Ludicolo,Thundurus,Magearna,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__
4,MrCrunchBar,Primainprime,False,,,6,6,Centiskorch,Scyther,Togedemaru,Venusaur,Solgaleo,Talonflame,Centiskorch,Scyther,Togedemaru,Venusaur,Solgaleo,Talonflame
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,Flarelda,Callme8543214589,True,1962,1935,6,6,Snorlax,Delibird,Marshadow,Tapu,Charizard,Sylveon,Snorlax,Delibird,Marshadow,Tapu,Charizard,Sylveon
139,derplorddragon,raceface111,True,1564,1575,6,6,Tangrowth,Blacephalon,Qwilfish,Omastar,Espeon,Darmanitan,Tangrowth,Blacephalon,Qwilfish,Omastar,Espeon,Darmanitan
140,Nasvaah,MaxHasAFax,True,1463,1449,6,6,Copperajah,Giratina,Pheromosa,Blacephalon,Cinccino,Sandslash,Copperajah,Giratina,Pheromosa,Blacephalon,Cinccino,Sandslash
141,rebonack,hop23,True,1650,1698,6,6,Xatu,Jellicent,Tangrowth,Zekrom,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Xatu,Jellicent,Tangrowth,Zekrom,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__


In [None]:
df.to_csv(OUTPUT_DIR + '/battle_stats.csv')
