## Exploring Battle Logs

In [8]:
from bs4 import BeautifulSoup


## Constants

In [9]:

INPUT_DIR="../battle_data"
OUTPUT_DIR="../stats_data"


UNKNOWN_POKEMON = '__UNKNOWN_POKEMON__'
NO_POKEMON = '__NO_POKEMON__'
UNKNOWN_RATING = None


## Generating Battle CSV Data

In [10]:
class Player:
    def __init__(self, id, logs):
        self.id = id
        
        for log in logs:
            if log[0] == "player" and log[1] == id and len(log) > 2:
                self.player_name = log[2]
                self.rating = UNKNOWN_RATING if len(log[4]) == 0 else int(log[4])
                break



In [11]:

class Pokemon:
    def __init__(self, player, id, species, logs):
        self.id = id
        self.player = player
        self.species = species
        self._logs = logs



In [86]:
class PokemonTeam:
    def __init__(self, player, logs):
        self.player = player
        self._logs = logs
        self._pokemon_map = {}
        self._pokemon = []
        self._load_pokemon()


    def __len__(self):
        for log in self._logs:
            if log[0] == 'teamsize' and log[1] == self.player.id:
                return int(log[2])

        return 0


    def __getitem__(self, i):
        return self._pokemon[i]


    def __iter__(self):
        for poke in self._pokemon:
            yield poke


    @property
    def has_unknown_pokemon(self):
        for p in self._pokemon:
            if p.species == UNKNOWN_POKEMON:
                return True

        return False


    @property
    def known_count(self):
        count = 0
        for p in self._pokemon:
            if p.species != UNKNOWN_POKEMON:
                count += 1

        return count


    def _load_pokemon(self):
        prefix = self.player.id + "a: "
        count = len(self)
        found_count = 0

        for log in self._logs:
            player_prefix = self.player.id + "a: "
            if log[0] == 'switch' and log[1].startswith(player_prefix):
                p_id = log[1]

                if p_id in self._pokemon_map:
                    continue

                species = p_id.split(" ")[1]

                pokemon = Pokemon(player=self.player, id=p_id, species=species, logs=self._logs)

                self._pokemon_map[p_id] = len(self._pokemon)
                self._pokemon.append(pokemon)
                found_count += 1

        while found_count < count:
            p_id = prefix + UNKNOWN_POKEMON + " " + str(count - found_count)
            pokemon = Pokemon(player=self.player, id=p_id, species=UNKNOWN_POKEMON, logs=self._logs)
            self._pokemon_map[p_id] = len(self._pokemon)
            self._pokemon.append(pokemon)
            found_count += 1




In [34]:
class BattleLogs:
    def __init__(self, logs):
        self._logs = logs
        self.p1 = Player(id="p1", logs=logs)
        self.p2 = Player(id="p2", logs=logs)

 
    @property
    def tier(self):
        for log in self._logs:
            if log[0] == "tier":
                return log[1]

    @property
    def is_rated(self):
        for log in self._logs:
            if log[0] == 'rated':
                return True

        return False

    
    @property
    def rules(self):
        return [x[1] for x in self._logs if x[0] == 'rule']


    @property
    def p1_team(self):
        return PokemonTeam(player=self.p1, logs=self._logs)


    @property
    def p2_team(self):
        return PokemonTeam(player=self.p2, logs=self._logs)


    @property
    def winner(self):
        for log in self._logs:
            if log[0] == "win":
                player_name = log[1]
                break

        if self.p1.player_name == player_name:
            return self.p1
        elif self.p2.player_name == player_name:
            return self.p2
        else:
            return None



In [35]:
import pandas as pd

# Number of log files we have in our data.
LOGS_COUNT = 143


In [36]:
with open('../battle_data/0001.html') as file:
    soup = BeautifulSoup(file, "html.parser")
    el = soup.find_all("script", class_="battle-log-data")[0]
    raw_logs = [x.split('|')[1:] for x in el.text.split('\n') if len(x) > 0]

logs = BattleLogs(logs=raw_logs)

for pokemon in logs.p2_team:
    print(pokemon.species)

print(logs.winner)


Duraludon
Archeops
Gothitelle
Regidrago
Darmanitan
Lunala
<__main__.Player object at 0x7fd7d8961a30>
['win', 'Rhodes28']


'Rhodes28'

In [16]:
pokedex = pd.read_csv('../stats_data/dex.csv')


In [105]:
def generate_frame(logs_count):
    idx = [
        'p1_name',
        'p2_name',
        'p_winner',
        'is_rated_battle',
        'p1_rating',
        'p2_rating',
        'team1_count',
        'team2_count',
        'p1_poke1',
        'p1_poke2',
        'p1_poke3',
        'p1_poke4',
        'p1_poke5',
        'p1_poke6',
        'p2_poke1',
        'p2_poke2',
        'p2_poke3',
        'p2_poke4',
        'p2_poke5',
        'p2_poke6',
        'team1_unknown_count',
        'team2_unknown_count']

    df = pd.DataFrame()
    list = []

    for i in range(logs_count):
        filename = INPUT_DIR + "/" + str(i + 1).zfill(4) + ".html"

        with open(filename) as file:
                soup = BeautifulSoup(file, "html.parser")
                el = soup.find_all("script", class_="battle-log-data")[0]
                raw_logs = [x.split('|')[1:] for x in el.text.split('\n') if len(x) > 0]

        logs = BattleLogs(raw_logs)

        p1_team_len = len(logs.p1_team)
        p2_team_len = len(logs.p2_team)

        list.append(pd.Series([
            logs.p1.player_name,
            logs.p2.player_name,
            logs.winner.id,
            logs.is_rated,
            logs.p1.rating,
            logs.p2.rating,
            p1_team_len,
            p2_team_len,
            NO_POKEMON if len(logs.p1_team) <= 0 else logs.p1_team[0].species,
            NO_POKEMON if len(logs.p1_team) <= 1 else logs.p1_team[1].species,
            NO_POKEMON if len(logs.p1_team) <= 2 else logs.p1_team[2].species,
            NO_POKEMON if len(logs.p1_team) <= 3 else logs.p1_team[3].species,
            NO_POKEMON if len(logs.p1_team) <= 4 else logs.p1_team[4].species,
            NO_POKEMON if len(logs.p1_team) <= 5 else logs.p1_team[5].species,
            NO_POKEMON if len(logs.p1_team) <= 0 else logs.p1_team[0].species,
            NO_POKEMON if len(logs.p1_team) <= 1 else logs.p1_team[1].species,
            NO_POKEMON if len(logs.p1_team) <= 2 else logs.p1_team[2].species,
            NO_POKEMON if len(logs.p1_team) <= 3 else logs.p1_team[3].species,
            NO_POKEMON if len(logs.p1_team) <= 4 else logs.p1_team[4].species,
            NO_POKEMON if len(logs.p1_team) <= 5 else logs.p1_team[5].species,
            p1_team_len - logs.p1_team.known_count,
            p2_team_len - logs.p2_team.known_count,
        ]))

    df = pd.concat(list, axis=1).T
    df.columns = idx
    return df



In [106]:
df= generate_frame(152)
df

Unnamed: 0,p1_name,p2_name,p_winner,is_rated_battle,p1_rating,p2_rating,team1_count,team2_count,p1_poke1,p1_poke2,...,p1_poke5,p1_poke6,p2_poke1,p2_poke2,p2_poke3,p2_poke4,p2_poke5,p2_poke6,team1_unknown_count,team2_unknown_count
0,Rhodes28,sbq1e,p1,True,1391,1440,6,6,Dragapult,Darmanitan,...,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Dragapult,Darmanitan,Lycanroc,Lapras,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,2,0
1,Xebrilag27,Rage69,p2,True,1754,1784,6,6,Arctovish,Salazzle,...,Electivire,Sirfetch’d,Arctovish,Salazzle,Jellicent,Blacephalon,Electivire,Sirfetch’d,0,1
2,Raccoonis,WhoIsJoeLigma?,p1,True,1085,1058,6,6,Haxorus,Urshifu,...,Luxray,__UNKNOWN_POKEMON__,Haxorus,Urshifu,Dubwool,Duraludon,Luxray,__UNKNOWN_POKEMON__,1,0
3,bembi,SecondaryStomach,p1,True,1269,1315,6,6,Ludicolo,Thundurus,...,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Ludicolo,Thundurus,Magearna,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,3,0
4,MrCrunchBar,Primainprime,p2,False,,,6,6,Centiskorch,Scyther,...,Solgaleo,Talonflame,Centiskorch,Scyther,Togedemaru,Venusaur,Solgaleo,Talonflame,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,Yakult_Ako,trying_toget_tough,p2,True,1498,1466,6,6,Barbaracle,Silvally,...,Palkia,Dubwool,Barbaracle,Silvally,Ninetales,Sceptile,Palkia,Dubwool,0,0
148,Piggypigpig,megapupex,p1,True,1119,1067,6,6,Nidoking,Tapu,...,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Nidoking,Tapu,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,4,0
149,Waspolo,Lazurium,p2,True,1394,1429,6,6,Shuckle,Charizard,...,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,Shuckle,Charizard,Arctozolt,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,__UNKNOWN_POKEMON__,3,5
150,Korbomemero,Rajsters,p1,True,1300,1357,6,6,Pinsir,Krookodile,...,Buzzwole,Kyurem,Pinsir,Krookodile,Sandslash,Marowak,Buzzwole,Kyurem,0,0


In [73]:
df.to_csv(OUTPUT_DIR + '/battle_stats.csv')


## Exploring the Data

In [74]:
import matplotlib.pyplot as plt


How many rated battles are there?

In [75]:

rated_count = df[df["is_rated_battle"]].shape[0]
total_count = df.shape[0]

(rated_count, total_count)


(134, 152)

Which Pokemon are frequently used?

In [76]:
from collections import Counter


In [77]:
starter_pokemon_counter = Counter()
pokemon_counter = Counter()

starter_pokemon_counter.update(df['p1_poke1'])
starter_pokemon_counter.update(df['p2_poke1'])

del starter_pokemon_counter[UNKNOWN_POKEMON]
del starter_pokemon_counter[NO_POKEMON]

pokemon_counter.update(df['p1_poke1'])
pokemon_counter.update(df['p1_poke2'])
pokemon_counter.update(df['p1_poke3'])
pokemon_counter.update(df['p1_poke4'])
pokemon_counter.update(df['p1_poke5'])
pokemon_counter.update(df['p1_poke6'])
pokemon_counter.update(df['p2_poke1'])
pokemon_counter.update(df['p2_poke2'])
pokemon_counter.update(df['p2_poke3'])
pokemon_counter.update(df['p2_poke4'])
pokemon_counter.update(df['p2_poke5'])
pokemon_counter.update(df['p2_poke6'])

del pokemon_counter[UNKNOWN_POKEMON]
del pokemon_counter[NO_POKEMON]


Number of Different Pokémon Species Used

In [78]:

len(pokemon_counter)


308

20 Most Common Pokémon Used

In [79]:
pokemon_counter.most_common(20)



[('Rotom', 22),
 ('Slowbro', 22),
 ('Ninetales', 16),
 ('Urshifu', 14),
 ('Porygon2', 14),
 ('Salazzle', 14),
 ('Duraludon', 14),
 ('Omastar', 14),
 ('Kyurem', 14),
 ('Ho-Oh', 12),
 ('Talonflame', 12),
 ('Rhyperior', 12),
 ('Mr.', 12),
 ('Marowak', 12),
 ('Darmanitan', 12),
 ('Sandslash', 12),
 ('Gastrodon', 12),
 ('Kingler', 12),
 ('Excadrill', 10),
 ('Butterfree', 10)]

20 Most Common Starter Pokémon Used

In [80]:
starter_pokemon_counter.most_common(20)

[('Ho-Oh', 8),
 ('Aegislash', 6),
 ('Gastrodon', 6),
 ('Urshifu', 6),
 ('Centiskorch', 4),
 ('Claydol', 4),
 ('Farfetch’d', 4),
 ('Frosmoth', 4),
 ('Articuno', 4),
 ('Toxicroak', 4),
 ('Cradily', 4),
 ('Butterfree', 4),
 ('Stonjourner', 4),
 ('Marowak', 4),
 ('Darmanitan', 4),
 ('Zapdos', 4),
 ('Rhydon', 4),
 ('Braviary', 4),
 ('Kingler', 4),
 ('Inteleon', 4)]

Are there any teams that do not have 6 Pokémon total?

In [110]:
not_6_team_battles = df[(df["team1_count"] != 6) | (df["team2_count"] != 6)]
not_6_team_battles


Unnamed: 0,p1_name,p2_name,p_winner,is_rated_battle,p1_rating,p2_rating,team1_count,team2_count,p1_poke1,p1_poke2,...,p1_poke5,p1_poke6,p2_poke1,p2_poke2,p2_poke3,p2_poke4,p2_poke5,p2_poke6,team1_unknown_count,team2_unknown_count
17,Giga㋛Chandelure-,Sinpleto,p1,True,,,3,3,Crustle,Genesect,...,__NO_POKEMON__,__NO_POKEMON__,Crustle,Genesect,Vespiquen,__NO_POKEMON__,__NO_POKEMON__,__NO_POKEMON__,0,0
44,RobinOfYlisse,Mbouchon,p2,True,,,24,24,Goodra,Duraludon,...,Dragapult,Latias,Goodra,Duraludon,Flygon,Rayquaza,Dragapult,Latias,18,20


Let's filter out any battles whether both players do not have 6 Pokémon.

In [111]:
full_team_df = df[(df["team1_count"] == 6) & (df["team2_count"] == 6)]
full_team_df.shape[0]

150

For many battles, we do not get to see the entire team of Pokémon that a player is using. What portion of players do we have perfect information on their teams?


In [112]:
total_players = 2 * full_team_df.shape[0]
total_unknown = sum(full_team_df['team1_unknown_count'] == 0) + sum(full_team_df['team2_unknown_count'] == 0)

(total_unknown, total_players, total_unknown / total_players)



(153, 300, 0.51)

What portion of Pokémon are unknown for the different teams?

In [122]:
unknown_counts = pd.concat([full_team_df["team1_unknown_count"], full_team_df["team2_unknown_count"]])
unknown_counts

(
    ("0.25 Quantile:", unknown_counts.quantile(0.25)),
    ("0.50 Quantile:", unknown_counts.quantile(0.50)),
    ("0.75 Quantile:", unknown_counts.quantile(0.75)),
    ("0.90 Quantile:", unknown_counts.quantile(0.90)),
    ("0.99 Quantile:", unknown_counts.quantile(0.99)),
    ("Mean:",          unknown_counts.mean()),
)



(('0.25 Quantile:', 0.0),
 ('0.50 Quantile:', 0.0),
 ('0.75 Quantile:', 2.0),
 ('0.90 Quantile:', 3.0),
 ('0.99 Quantile:', 5.0),
 ('Mean:', 1.1066666666666667))

TODO: Get statistics on number of turns per battle.