In [63]:
from bs4 import BeautifulSoup
from collections import OrderedDict
import csv
import math
import sys
import urllib.request
import pandas as pd

In [103]:
def get_wiki_page(url):
    # user agent
    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    with urllib.request.urlopen(req) as response:
        data = response.read().decode('utf-8')
        return data

def prefetch_base_stats():
    # for Pokemon Go
    url = "https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_base_stats_(GO)"

# PokeGenie current export columns
POKEGENIE_COLUMNS = 'Index,Name,Form,Pokemon Number,Gender,CP,HP,Atk IV,Def IV,Sta IV,IV Avg,Level Min,Level Max,Quick Move,Charge Move,Charge Move 2,Scan Date,Original Scan Date,Catch Date,Weight,Height,Lucky,Shadow/Purified,Favorite,Dust,Rank % (G),Rank # (G),Stat Prod (G),Dust Cost (G),Candy Cost (G),Name (G),Form (G),Sha/Pur (G),Rank % (U),Rank # (U),Stat Prod (U),Dust Cost (U),Candy Cost (U),Name (U),Form (U),Sha/Pur (U),Rank % (L),Rank # (L),Stat Prod (L),Dust Cost (L),Candy Cost (L),Name (L),Form (L),Sha/Pur (L),Marked for PvP use'.split(',')


In [211]:


def fetch_base_stats(data):
    soup = BeautifulSoup(data, 'html.parser')
    body = soup.find('div', class_='mw-parser-output')
    rows = body.findNext('table', class_='sortable').findAll('tr')[1:]
    stats = {}
    for row in rows:
        tds = row.findAll('td')
        mon = tds[2].text.strip()
        stats[mon] = {
            'HP': tds[3].text.strip(),
            'Attack': tds[4].text.strip(),
            'Defense': tds[5].text.strip(),
            'Product': tds[6].text.strip(),
            'Max CP (Lvl 40)': tds[7].text.strip(),
            'Max CP (Lvl 50)': tds[8].text.strip()
        }
    return stats

def fetch_cp_table(data):
    """ Data is the html from the wiki page """
    # get h2#Levels h3.span.text=CP multiplier#<table>
    soup = BeautifulSoup(data, 'html.parser')
    # get class mw-parser-output
    body = soup.find('div', class_='mw-parser-output')
    # get h2#Levels
    levels_bro = None
    h2s = body.findAllNext('h2')
    for h in h2s:
        if h.find('span', id='Levels'):
            levels_bro = h

    if not levels_bro:
        raise Exception("Could not find h2#Levels")
        return

    # get table
    table = levels_bro.find_next_sibling('table')
    # skip first tr; columns are Levels, CP multiplier, Marginal stardust, marginal candy, Cumulative stardust,
    # cumulative candy
    rows = table.findAll('tr')[2:]
    cp_per_level = OrderedDict()
    for row in rows:
        tds = row.findAll('td')
        level = float(tds[0].text)
        if level == 50:
            cum_dust = 'n/a'
            cum_candy = 'n/a'
            marg_dust = 'n/a'
            marg_candy = 'n/a'
        elif level > 50:
            break
        else:
            cum_dust = int(tds[4].contents[1])
            marg_dust = int(tds[2].contents[1])
            marg_candy = int(tds[3].contents[1])
            cum_candy = int(tds[5].contents[1])
        cp_per_level[level] = {
            'cpm': float(tds[1].text),
            'marg_dust': marg_dust,
            'cum_dust': cum_dust,
            'marg_candy': marg_candy,
            'cum_candy': cum_candy
        }

    return cp_per_level

def load_data():
    with open('List_of_Pokémon_by_base_stats_(GO).html') as f:
        data = f.read()
        base_stats = fetch_base_stats(data)

    with open('Power Up - Bulbapedia, the community-driven Pokémon encyclopedia.html') as f:
        cpm_page = f.read()
        cp_table = fetch_cp_table(cpm_page)

    return base_stats, cp_table

In [284]:
bases, cpms = load_data()
base_stats = pd.DataFrame.from_dict(bases, orient='index').astype('int32')
cpm_stats = pd.DataFrame.from_dict(cpms, orient='index')

In [286]:

base_stats['Total stat sum'] = base_stats['HP'] + base_stats['Attack'] + base_stats['Defense']
base_stats.sort_values('Total stat sum', ascending=False)

Unnamed: 0,HP,Attack,Defense,Product,Max CP (Lvl 40),Max CP (Lvl 50),Total stat sum
Groudon(Primal Groudon),218,353,268,20623672,5902,6672,839
Kyogre(Primal Kyogre),218,353,268,20623672,5902,6672,839
Blissey,496,129,169,10813296,2757,3117,794
Salamence(Mega Salamence),216,310,251,16806960,5031,5688,777
Latias(Mega Latias),190,289,297,16308270,4801,5428,776
Latios(Mega Latios),190,335,241,15339650,5007,5661,766
Aggron(Mega Aggron),172,247,331,14062204,4162,4705,750
Slaking,284,290,166,13671760,4431,5010,740
Kyurem(Black Kyurem),245,310,183,13898850,4605,5206,738
Kyurem(White Kyurem),245,310,183,13898850,4605,5206,738


In [298]:
base_stats[base_stats["Max CP (Lvl 50)"] <= 2500].sort_values('Total stat sum', ascending=False)

Unnamed: 0,HP,Attack,Defense,Product,Max CP (Lvl 40),Max CP (Lvl 50),Total stat sum
Chansey,487,60,128,3740160,1255,1418,675
Alomomola,338,138,131,6110364,2169,2452,607
Umbreon,216,126,240,6531840,2137,2416,582
Mandibuzz,242,129,205,6399690,2138,2417,576
Probopass,155,135,275,5754375,2080,2351,565
Stunfisk,240,144,171,5909760,2162,2445,555
Stunfisk(Galarian Form),240,144,171,5909760,2162,2445,555
Lanturn,268,146,137,5360536,2085,2357,551
Wobbuffet,382,60,106,2429520,1026,1160,548
Cradily,200,152,194,5897600,2211,2499,546


In [78]:
cpm_stats

Unnamed: 0,cpm,marg_dust,cum_dust,marg_candy,cum_candy
1.0,0.094000,200,200,1,1
1.5,0.135137,200,400,1,2
2.0,0.166398,200,600,1,3
2.5,0.192651,200,800,1,4
3.0,0.215732,400,1200,1,5
...,...,...,...,...,...
48.0,0.830300,14000,476000,20,236
48.5,0.832804,14000,490000,20,256
49.0,0.835300,15000,505000,20,276
49.5,0.837804,15000,520000,20,296


In [214]:
cpm_stats.at[1.5,  'cpm']

0.13513743

In [174]:
poke_genie_3_9 = pd.read_csv('poke_genie_export2023-03-09.csv')
poke_genie_3_9[['Index', 'Name', 'Form', 'Pokemon Number', 'CP', 'HP', 'Atk IV', 'Def IV', 'Sta IV', 'Level Min',
                'Level Max', 'Quick Move', 'Charge Move', 'Charge Move 2','Lucky','Shadow/Purified']]

Unnamed: 0,Index,Name,Form,Pokemon Number,CP,HP,Atk IV,Def IV,Sta IV,Level Min,Level Max,Quick Move,Charge Move,Charge Move 2,Lucky,Shadow/Purified
0,1,Snover,,459,877,115,14,8,9,28.0,28.0,Ice Shard,Ice Beam,,0,0
1,2,Hippopotas,,449,566,95,14,10,15,15.0,15.0,Tackle,Rock Tomb,,0,0
2,3,Plusle,,311,852,93,1,10,6,19.0,19.0,Quick Attack,Swift,,0,0
3,4,Elgyem,,605,559,81,11,10,11,15.0,15.0,Astonish,Psybeam,,0,0
4,5,Scatterbug,,664,190,67,10,10,14,15.0,15.0,Tackle,Struggle,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549,550,Monferno,,391,660,91,15,10,15,15.0,15.0,,,,0,0
550,551,Dunsparce,,206,694,124,11,11,15,15.0,15.0,,,,0,0
551,552,Dodrio,,85,728,73,15,12,12,11.0,11.0,,,,0,0
552,553,Fletchinder,,662,974,113,11,13,15,24.0,24.0,,,,0,0


In [181]:
# double checks that the level data is certain
for entry in poke_genie_3_9.iterrows():
    mp = entry[1]
    if int(mp['Level Max'] - mp['Level Min']) != 0:
        print(mp['Name'], mp['Level Min'], mp['Level Max'])

In [242]:
name_col = POKEGENIE_COLUMNS.index('Name')
atkiv_col = POKEGENIE_COLUMNS.index('Atk IV')
defiv_col = POKEGENIE_COLUMNS.index('Def IV')
staiv_col = POKEGENIE_COLUMNS.index('Sta IV')
for index, row in poke_genie_3_9.iterrows():
    my_pokemon = row.copy()
    mon = my_pokemon[name_col]
    if isinstance(my_pokemon['Form'], str) and my_pokemon['Form'] != 'Normal':
        mon += f"({my_pokemon['Form']})"
        print(f"🚨🚨🚨🚨🚨 {mon} is a different form and i haven't smoothed out form lookups in the table so skipping")
        continue
    a = int(my_pokemon[atkiv_col])
    d = int(my_pokemon[defiv_col])
    s = int(my_pokemon[staiv_col])
    try:
        bhp = int(base_stats['HP'][mon])
        ba =  int(base_stats['Attack'][mon])
        bd =  int(base_stats['Defense'][mon])
    except KeyError:
        print(f"🚨🚨🚨🚨🚨 {mon} has forms in the base stats table and i haven't smoothed out form lookups in the table so skipping")
    bases = f'Base:{ba}/{bd}/{bhp}'
    ivs = f'IVs: {a}/{d}/{s}'
    stored_cp = int(my_pokemon['CP'])
    level = float(mp['Level Min']) # assuming the level max == min above
    cpm = float(cpm_stats.at[level,  'cpm'])

    ratk = math.floor((a + ba) * cpm)
    rdef = math.floor((d + bd) * cpm)
    rhp = math.floor((s + bhp) * cpm)
    reals = f'Reals: {ratk}/{rdef}/{rhp}'

    cp = math.floor(ratk * math.sqrt(rdef) * math.sqrt(rhp) / 10)

    print(f'{mon:<20} {bases:<20} {ivs:<20} {reals:<20} Stored CP: {stored_cp:<5} Computed CP (prob wrong): {cp:<5}')
    poke_genie_3_9.loc[index, 'Total stat sum'] = ratk + rdef + rhp


Snover               Base:115/105/155     IVs: 14/8/9          Reals: 77/67/97      Stored CP: 877   Computed CP (prob wrong): 620  
Hippopotas           Base:124/118/169     IVs: 14/10/15        Reals: 82/76/109     Stored CP: 566   Computed CP (prob wrong): 746  
Plusle               Base:167/129/155     IVs: 1/10/6          Reals: 100/83/96     Stored CP: 852   Computed CP (prob wrong): 892  
Elgyem               Base:148/100/146     IVs: 11/10/11        Reals: 94/65/93      Stored CP: 559   Computed CP (prob wrong): 730  
Scatterbug           Base:63/63/116       IVs: 10/10/14        Reals: 43/43/77      Stored CP: 190   Computed CP (prob wrong): 247  
Spewpa               Base:48/89/128       IVs: 14/11/14        Reals: 37/59/84      Stored CP: 197   Computed CP (prob wrong): 260  
Qwilfish             Base:184/138/163     IVs: 11/10/2         Reals: 116/88/98     Stored CP: 1305  Computed CP (prob wrong): 1077 
Dewott               Base:159/116/181     IVs: 0/1/7           Reals:

In [None]:
poke_genie_3_9

In [None]:
pd.set_option('display.max_rows', None)
poke_genie_3_9.sort_values('Total stat sum', ascending=True)
    #
    # for a in range(0, 16):
    #     for d in range(0, 16):
    #         for s in range(0, 16):
    #             for level in cp_table:
    #                 cpm = cp_table[level]['cpm']
    #                 computed_atk = (base_stats[mon]['atk'] + a) * cpm
    #                 computed_def = (base_stats[mon]['def'] + d) * cpm
    #                 computed_sta = (base_stats[mon]['hp'] + s) * cpm
    #                 computed_atk, computed_def, computed_sta = math.floor(computed_atk), math.floor(computed_def), math.floor(computed_sta)
    #                 cp = math.floor(computed_atk * math.sqrt(computed_def) * math.sqrt(computed_sta) / 10)
    #                 combos[mon][f'{a}/{d}/{s}'] = {
    #                     'computed_atk': computed_atk,
    #                     'computed_def': computed_def,
    #                     'computed_hp': computed_sta,
    #                     'cp': cp
    #                 }

Unnamed: 0,Index,Name,Form,Pokemon Number,Gender,CP,HP,Atk IV,Def IV,Sta IV,...,Rank % (L),Rank # (L),Stat Prod (L),Dust Cost (L),Candy Cost (L),Name (L),Form (L),Sha/Pur (L),Marked for PvP use,Total stat sum
13,14,Wimpod,,767,♂,29,21,3,9,5,...,36.29%,2610.0,91.52%,2800.0,406.0,Golisopod,,0.0,,148.0
455,456,Sunkern,,191,♂,110,48,15,13,13,...,99.39%,26.0,95.16%,510000.0,286.0,Sunkern,,0.0,,149.0
471,472,Sunkern,,191,♀,195,65,13,15,13,...,99.39%,26.0,95.16%,484400.0,256.0,Sunkern,,0.0,,149.0
340,341,Ralts,,280,♂,194,52,7,11,3,...,73.89%,1070.0,91.98%,438400.0,268.0,Ralts,,0.0,,152.0
344,345,Ralts,,280,♀,201,52,12,8,3,...,41.78%,2385.0,88.75%,358400.0,268.0,Ralts,,0.0,,154.0
92,93,Ralts,,280,♂,162,51,13,2,13,...,30.28%,2856.0,87.49%,343400.0,280.0,Ralts,,0.0,,156.0
446,447,Cosmog,,789,,169,70,11,13,11,...,42.98%,2336.0,89.37%,38400.0,61.0,Cosmoem,,0.0,,160.0
89,90,Ralts,,280,♂,206,53,14,15,8,...,62.66%,1530.0,90.76%,202600.0,206.0,Ralts,,0.0,,162.0
477,478,Wimpod,,767,♀,308,67,14,13,15,...,90.92%,373.0,94.48%,122500.0,238.0,Wimpod,,0.0,,163.0
4,5,Scatterbug,,664,♀,190,67,10,10,14,...,95.73%,176.0,95.26%,481400.0,268.0,Scatterbug,,0.0,,163.0


In [270]:
base_stats[['HP', 'Attack', 'Defense']].loc[['Chansey', 'Gyarados']]

Unnamed: 0,HP,Attack,Defense
Chansey,487,60,128
Gyarados,216,237,186


In [274]:
q = poke_genie_3_9.query('CP <= 1500').sort_values('Total stat sum', ascending=False)
print(q)

     Index         Name      Form  Pokemon Number Gender    CP   HP  Atk IV  \
22      23      Chansey       NaN             113      ♀   495  258      10   
299    300      Chansey       NaN             113      ♀   494  258      10   
12      13      Chansey       NaN             113      ♀   654  287      15   
243    244     Vaporeon       NaN             134      ♂   826  122       0   
32      33     Vaporeon       NaN             134      ♂   997  132       8   
525    526      Sylveon       NaN             700      ♂   497   74      15   
110    111       Aggron    Normal             306      ♀  1500  107       5   
214    215     Hariyama       NaN             297      ♂  1498  184       0   
479    480      Steelix    Normal             208      ♂   734   85      11   
308    309      Leafeon       NaN             470      ♂   644   66      11   
524    525      Flareon       NaN             136      ♂   663   64      12   
272    273        Throh       NaN             538   