In [6]:
from pathlib import Path
import pandas as pd

PRINT_RES_PER_POKEMON = False

sprites_dir = Path('pokemon_sprites')

pokemon_image_counts = {}
total_images = 0

for pokemon_dir in sprites_dir.iterdir():
    if pokemon_dir.is_dir() and not pokemon_dir.name.startswith('.'):
        pokemon_name = pokemon_dir.name
        
        # count PNG images 
        image_files = list(pokemon_dir.glob('*.png'))
        image_count = len(image_files)
        
        pokemon_image_counts[pokemon_name] = image_count
        total_images += image_count

sorted_pokemon = dict(sorted(pokemon_image_counts.items()))

if PRINT_RES_PER_POKEMON:
    print("\n" + "="*50)
    print("Images per pokemon:")
    print("="*50)
    for pokemon, count in sorted_pokemon.items():
        print(f"{pokemon}: {count} images")

print(f"Total number of images: {total_images}")
print(f"Total number of pokemon: {len(pokemon_image_counts)}")
print(f"Average images per pokemon: {total_images / len(pokemon_image_counts):.2f}")


df = pd.DataFrame(list(sorted_pokemon.items()), columns=['Pokemon', 'Image_Count'])
print("\n" + "="*50)
print("Summary Statistics:")
print("="*50)
print(f"Min images per pokemon: {df['Image_Count'].min()}")
print(f"Max images per pokemon: {df['Image_Count'].max()}")
print(f"Median images per pokemon: {df['Image_Count'].median()}")
print(f"Standard deviation: {df['Image_Count'].std():.2f}")

print(f"\nPokemon with most images: {df.loc[df['Image_Count'].idxmax(), 'Pokemon']} ({df['Image_Count'].max()} images)")
print(f"Pokemon with least images: {df.loc[df['Image_Count'].idxmin(), 'Pokemon']} ({df['Image_Count'].min()} images)")

Total number of images: 4504
Total number of pokemon: 864
Average images per pokemon: 5.21

Summary Statistics:
Min images per pokemon: 2
Max images per pokemon: 9
Median images per pokemon: 5.0
Standard deviation: 2.33

Pokemon with most images: arbok (9 images)
Pokemon with least images: aegislash (2 images)


In [7]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('pokemon.csv')

# Show dimensions
print(f"Pokemon dataset dimensions: {df.shape}")
print(f"Number of rows: {df.shape[0]}")
print(f"Number of columns: {df.shape[1]}")

# Optional: Show column names
print(f"\nColumn names:")
print(df.columns.tolist())

# Optional: Show first few rows to understand the data structure
print(f"\nFirst 5 rows:")
print(df.head())

Pokemon dataset dimensions: (801, 41)
Number of rows: 801
Number of columns: 41

Column names:
['abilities', 'against_bug', 'against_dark', 'against_dragon', 'against_electric', 'against_fairy', 'against_fight', 'against_fire', 'against_flying', 'against_ghost', 'against_grass', 'against_ground', 'against_ice', 'against_normal', 'against_poison', 'against_psychic', 'against_rock', 'against_steel', 'against_water', 'attack', 'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate', 'classfication', 'defense', 'experience_growth', 'height_m', 'hp', 'japanese_name', 'name', 'percentage_male', 'pokedex_number', 'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg', 'generation', 'is_legendary']

First 5 rows:
                     abilities  against_bug  against_dark  against_dragon  \
0  ['Overgrow', 'Chlorophyll']          1.0           1.0             1.0   
1  ['Overgrow', 'Chlorophyll']          1.0           1.0             1.0   
2  ['Overgrow', 'Chlorophyll']    