In [1]:
import os
import pandas as pd

In [2]:
# -------------------------------
# 1️⃣ Read CSV and clean names
# -------------------------------
path_to_file = './archive/pokemon-data.csv'
df = pd.read_csv(path_to_file)

# Remove leading/trailing spaces and internal spaces
df["Name"] = df["Name"].str.strip().str.replace(" ", "", regex=False)

In [3]:
# -------------------------------
# 2️⃣ Get folder names from file system
# -------------------------------
path = "./archive/additional_images"
folder_list = [name.strip().replace(" ", "") for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]


In [5]:
# -------------------------------
# 3️⃣ Find missing folders
# -------------------------------
missing_folders = [name for name in df["Name"].tolist() if name not in folder_list]
print(missing_folders)


['MegaVenusaur', 'MegaCharizardX', 'MegaCharizardY', 'MegaBlastoise', 'MegaBeedrill', 'MegaPidgeot', 'AlolanRattata', 'AlolanRaticate', 'PartnerPikachu', 'AlolanRaichu', 'AlolanSandshrew', 'AlolanSandslash', 'AlolanVulpix', 'AlolanNinetales', 'AlolanDiglett', 'AlolanDugtrio', 'AlolanMeowth', 'GalarianMeowth', 'AlolanPersian', 'MegaAlakazam', 'AlolanGeodude', 'AlolanGraveler', 'AlolanGolem', 'GalarianPonyta', 'GalarianRapidash', 'GalarianSlowpoke', 'MegaSlowbro', 'GalarianSlowbro', "Farfetch'D", "GalarianFarfetch'D", 'AlolanGrimer', 'AlolanMuk', 'MegaGengar', 'AlolanExeggutor', 'AlolanMarowak', 'GalarianWeezing', 'MegaKangaskhan', 'GalarianMr.Mime', 'MegaPinsir', 'MegaGyarados', 'PartnerEevee', 'MegaAerodactyl', 'GalarianArticuno', 'GalarianZapdos', 'GalarianMoltres', 'MegaMewtwoX', 'MegaMewtwoY', 'MegaAmpharos', 'GalarianSlowking', 'MegaSteelix', 'MegaScizor', 'MegaHeracross', 'GalarianCorsola', 'MegaHoundoom', 'MegaTyranitar', 'Ho-Oh', 'MegaSceptile', 'MegaBlaziken', 'MegaSwampert', '

In [7]:
# -------------------------------
# 4️⃣ Filter out missing names
# -------------------------------
df_valid = df[~df["Name"].isin(missing_folders)]  # keep only names that have folders
print(df_valid)


      Pokedex No.        Name      Type Other Type     HP  Attack  Defense  \
0               1   Bulbasaur     Grass     Poison   45.0    49.0     49.0   
1               2     Ivysaur     Grass     Poison   60.0    62.0     63.0   
2               3    Venusaur     Grass     Poison   80.0    82.0     83.0   
4               4  Charmander      Fire        NaN   39.0    52.0     43.0   
5               5  Charmeleon      Fire        NaN   58.0    64.0     58.0   
...           ...         ...       ...        ...    ...     ...      ...   
1039          895   Regidrago    Dragon        NaN  200.0   100.0     50.0   
1040          896   Glastrier       Ice        NaN  100.0   145.0    130.0   
1041          897   Spectrier     Ghost        NaN  100.0    65.0     60.0   
1042          898     Calyrex   Psychic      Grass  100.0    80.0     80.0   
1045          899      MimeJr   Psychic      Fairy    NaN     NaN      NaN   

      Special Attack  Special Defense  Speed  Total  Generation

In [8]:
# -------------------------------
# 5️⃣ Save new CSV
# -------------------------------
output_file = './archive/pokemon_with_folders.csv'
df_valid.to_csv(output_file, index=False)

print(f"New CSV created with {len(df_valid)} names. Saved to {output_file}")

New CSV created with 891 names. Saved to ./archive/pokemon_with_folders.csv
