In [1]:
import requests
from bs4 import BeautifulSoup
import re
import json
import pandas as pd

In [2]:
def capitalize_words(string):
    # Split the string at each space or hyphen
    words = re.split(r"[\s-]+", string)
    
    # Capitalize the first letter of each word
    capitalized_words = [word.capitalize() for word in words]
    
    # Join the words back together with spaces
    capitalized_string = " ".join(capitalized_words)
    
    return capitalized_string

In [94]:
# DEFAULT CONFIG
BASE_URL = "https://pokemondb.net"
# EXCLUDED_FORMS = [
#     "Mega", 
#     "Gigantamax", 
#     "Ultra", "Dawn Wings", "Dusk Mane", # Necrozma
#     "Busted", # Mimikyu
#     "Noice", # Eiscue
#     "Hangry", # Morpeko
#     "Crowned", # Zacian/Zamazenta,
#     "Eternamax", # Eternatus
#     "Sunshine", # Cherrim
#     "Ash", # Greninja
#     "Blade", # Aegislash
#     "Meteor", # Minior
#     "School", # Wishiwashi
#     "Roaming", # Gimmighoul
#     "Sprinting", # Koraidon
#     "Drive", # Miraidon,
#     "Gulping", "Gorging", # Cramorant
#     "Zen", "Galarian Zen", # Darmanitan
#     "Hero", # Palafin,
#     "Origin", # Dialga, Palkia, Giratina,
#     "Black", "White", "Kyurem",
#     "10%", "Complete" # Zygarde,
#     "Active", # Xerneas
# ]
EXCLUDED_WORDS = [
    "Meteor", # Minior
    "Mega",
    "Gigantamax",
    "Hero"
]

EXCLUDED_FORMS = [ # only get first form from this list
    "Mimikyu",
    "Eiscue",
    "Morpeko",
    "Eternatus",
    "Castform",
    "Cherrim",
    "Greninja",
    "Aegislash",
    "Wishiwashi",
    "Giratina",
    "Meloetta",
    "Arceus",
    "Silvally",
    "Genesect",
    "Kyurem",
    "Xerneas",
    "Zygarde",
    "Necrozma",
    "Zamazenta",
    "Zacian",
    "Calyrex",
    "Darmanitan",
    "Cramorant"
]


In [83]:
# Send a GET request to the website and get its HTML content
response = requests.get(BASE_URL + "/sprites")
html_content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
info_cards = soup.find_all("a", attrs={"class": "infocard"})

In [95]:
processed = 0
pkm = {
    "No.": [],
    "Name": [],
    "Sprite": []
}

# start_pkm = 0
# end_pkm = len(info_cards)
start_pkm = 963
end_pkm = 964

for i in range(start_pkm, end_pkm):
    sprite_url = info_cards[i]["href"]
    base_name = info_cards[i].text.lstrip().rstrip()
    
    s_response = requests.get(BASE_URL + sprite_url)
    s_html_content = s_response.content

    # Parse the HTML content using BeautifulSoup
    s_soup = BeautifulSoup(s_html_content, "html.parser")
    pkm_no = int(s_soup.find_all("em")[0].text.split(" ")[0][1:])
    pkm["No."].append(pkm_no)
    pkm["Name"].append(base_name)

    s_table = s_soup.find_all("table", attrs={"class": "data-table sprites-table block-wide"})
    sprite = {}
    is_del = False
    for tbl in s_table[:2]:
        s_tr = tbl.find_all("tr")[1]
        s_td = s_tr.find_all("td")
        if str.strip(s_td[0].text) in ("Home"):
            for td in s_td[1:3]:
                s_span = td.find_all("span", attrs={"class": "sprites-table-card"})
                if base_name in EXCLUDED_FORMS:
                    s_span = s_span[0:1]    
                
                for s in s_span:
                    if not any(s.text.startswith(w) for w in EXCLUDED_WORDS):
                        href = s.find("a")["href"]
                        p_variant = capitalize_words(s.text)
                        if p_variant not in sprite:
                            sprite[p_variant] = {
                                "Normal": "",
                                "Shiny": ""
                            }                     
                        if "/normal/" in href:
                            sprite[p_variant]["Normal"] = href
                        else:
                            sprite[p_variant]["Shiny"] = href

    # Handle Minior shiny url
    if pkm_no == 774:
        shiny_url = sprite["Core (all Colors)"]["Shiny"]
        for k in sprite:
            if k.endswith("Core"):
                sprite[k]["Shiny"] = shiny_url
        del sprite["Core (all Colors)"]

    sprite_list = [{"Variant": k.replace(f" {base_name}", ""), "Sprite URL": v} for k,v in sprite.items()]
    # sort pumpkaboo/gourgeist by size instead of size name
    if pkm_no in (710, 711): 
        sort = [2, 0, 1, 3]
        sprite_list = [sprite_list[i] for i in sort]
    pkm["Sprite"].append(sprite_list)
    # Logging every 100 pkms
    i += 1
    if i % 100 == 0 or i == end_pkm:
        print("Processed %s pkms" % i)

Processed 964 pkms


In [97]:
# pkm["Sprite"][214]
pkm

{'No.': [964],
 'Name': ['Palafin'],
 'Sprite': [[{'Variant': 'Zero Form',
    'Sprite URL': {'Normal': 'https://img.pokemondb.net/sprites/home/normal/palafin-zero.png',
     'Shiny': 'https://img.pokemondb.net/sprites/home/shiny/palafin-zero.png'}}]]}

In [90]:
df = pd.DataFrame(pkm)
df = df.explode("Sprite")
df = pd.concat([df, df["Sprite"].apply(pd.Series)], axis=1)
df = pd.concat([df, df["Sprite URL"].apply(pd.Series)], axis=1)
df["Name"] = df["Name"] + " " + df["Variant"]
df["Name"] = df["Name"].apply(str.strip)
df = df.drop(["Sprite", "Variant", "Sprite URL"], axis=1)
df = df.replace(r"\n", " ", regex=True)
df

Unnamed: 0,No.,Name,Normal,Shiny
0,1,Bulbasaur,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/b...
1,2,Ivysaur,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/i...
2,3,Venusaur Female,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/v...
2,3,Venusaur Male,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/v...
3,4,Charmander,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/c...
...,...,...,...,...
1005,1006,Iron Valiant,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/i...
1006,1007,Koraidon,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/k...
1007,1008,Miraidon,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/m...
1008,1009,Walking Wake,https://img.pokemondb.net/sprites/home/normal/...,https://img.pokemondb.net/sprites/home/shiny/w...


In [91]:
df.to_csv("./national_list.csv", index=False, encoding="utf-8")

In [11]:
import pandas as pd
import json

In [13]:
def create_filter(nat_id, name):
    return {
        "Name": name,
        "Species": nat_id,
        "RareVariant": False,
        "Form": None,
        "Stars": None,
        "StarsComp": 0,
        "Shiny": True,
        "Square": False,
        "Nature": None,
        "TeraType": None,
        "Gender": None,
        "Size": None,
        "IVBin": 0,
        "IVComps": 0,
        "IVVals": 1073741823,
        "Enabled": True,
        "RewardItems": None,
        "RewardsComp": -1,
        "RewardsCount": 0,
        "BatchFilters": None
    }

In [14]:
df = pd.read_csv("./RC filter.csv")

df["filter"] = df.apply(lambda x: create_filter(x["Nat #"], x["Name"]), axis=1)

In [15]:
filters = df["filter"].to_list()
json.dumps(filters)

'[{"Name": "Jigglypuff", "Species": 39, "RareVariant": false, "Form": null, "Stars": null, "StarsComp": 0, "Shiny": true, "Square": false, "Nature": null, "TeraType": null, "Gender": null, "Size": null, "IVBin": 0, "IVComps": 0, "IVVals": 1073741823, "Enabled": true, "RewardItems": null, "RewardsComp": -1, "RewardsCount": 0, "BatchFilters": null}, {"Name": "Wigglytuff", "Species": 40, "RareVariant": false, "Form": null, "Stars": null, "StarsComp": 0, "Shiny": true, "Square": false, "Nature": null, "TeraType": null, "Gender": null, "Size": null, "IVBin": 0, "IVComps": 0, "IVVals": 1073741823, "Enabled": true, "RewardItems": null, "RewardsComp": -1, "RewardsCount": 0, "BatchFilters": null}, {"Name": "Venonat", "Species": 48, "RareVariant": false, "Form": null, "Stars": null, "StarsComp": 0, "Shiny": true, "Square": false, "Nature": null, "TeraType": null, "Gender": null, "Size": null, "IVBin": 0, "IVComps": 0, "IVVals": 1073741823, "Enabled": true, "RewardItems": null, "RewardsComp": -1,

'[{"Name": "Jigglypuff", "Species": 39, "RareVariant": false, "Form": null, "Stars": null, "StarsComp": 0, "Shiny": true, "Square": false, "Nature": null, "TeraType": null, "Gender": 1, "Size": null, "IVBin": 0, "IVComps": 0, "IVVals": 1073741823, "Enabled": true, "RewardItems": null, "RewardsComp": -1, "RewardsCount": 0, "BatchFilters": null}, {"Name": "Wigglytuff", "Species": 40, "RareVariant": false, "Form": null, "Stars": null, "StarsComp": 0, "Shiny": true, "Square": false, "Nature": null, "TeraType": null, "Gender": 1, "Size": null, "IVBin": 0, "IVComps": 0, "IVVals": 1073741823, "Enabled": true, "RewardItems": null, "RewardsComp": -1, "RewardsCount": 0, "BatchFilters": null}, {"Name": "Venonat", "Species": 48, "RareVariant": false, "Form": null, "Stars": null, "StarsComp": 0, "Shiny": true, "Square": false, "Nature": null, "TeraType": null, "Gender": 1, "Size": null, "IVBin": 0, "IVComps": 0, "IVVals": 1073741823, "Enabled": true, "RewardItems": null, "RewardsComp": -1, "Rewards