In [4]:
import pandas as pd  

In [47]:
path = 'pokemon_data.csv'
# the converters argument will let Python read the ability column as a list, not a string
pokemon = pd.read_csv(path, converters={'abilities' : pd.eval})

In [110]:
def move_features(abilities):
    # we have used the loc() function before to search, but the query() function is much more efficient 
    # iloc gives us the first match as an object
# #     selected_pokemon = pokemon.query('name == @pokemon_name').iloc[0]
#     return {'first_move': selected_pokemon.abilities[0], 'second_move': selected_pokemon.abilities[1]}
    if len(abilities) == 1:
        abilities.append('None')
    return {'first_move': abilities[0], 'second_move': abilities[1]}

move_features(['Overgrow', 'Chlorophyll'])

{'first_move': 'Overgrow', 'second_move': 'Chlorophyll'}

In [91]:
# so i want a list of dataframes (ie a list of pokemons) for each type
# a list of water pokemon, bug pokemon, etc
# first I use the groupby function to group different pokemon by type
types = pokemon.groupby('type1')
# and I can see the different types
types.groups.keys()

dict_keys(['bug', 'dark', 'dragon', 'electric', 'fairy', 'fighting', 'fire', 'flying', 'ghost', 'grass', 'ground', 'ice', 'normal', 'poison', 'psychic', 'rock', 'steel', 'water'])

In [92]:
# and i can see all of the pokemons for each type
types.get_group('water')

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
6,"[Torrent, Rain Dish]",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,88.1,7,50,64,43,water,,9.0,1,0
7,"[Torrent, Rain Dish]",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,88.1,8,65,80,58,water,,22.5,1,0
8,"[Torrent, Rain Dish]",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,88.1,9,135,115,78,water,,85.5,1,0
53,"[Damp, Cloud Nine, Swift Swim]",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,50.0,54,65,50,55,water,,19.6,1,0
54,"[Damp, Cloud Nine, Swift Swim]",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,50.0,55,95,80,85,water,,76.6,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
750,"[Water Bubble, Water Absorb]",1.0,1.0,1.0,2.0,1.0,0.5,1.0,2.0,1.0,...,50.0,751,40,72,27,water,bug,4.0,7,0
751,"[Water Bubble, Water Absorb]",1.0,1.0,1.0,2.0,1.0,0.5,1.0,2.0,1.0,...,50.0,752,50,132,42,water,bug,82.0,7,0
770,"[Innards Out, Unaware]",1.0,1.0,1.0,2.0,1.0,1.0,0.5,1.0,1.0,...,50.0,771,30,130,5,water,,1.2,7,0
778,"[Dazzling, Strong Jaw, Wonder Skin ]",2.0,2.0,1.0,2.0,1.0,0.5,0.5,1.0,2.0,...,50.0,779,70,70,92,water,psychic,19.0,7,0


In [94]:
# but I only care about the name and abilities so this time, I'll only include this columns
types = pokemon.groupby('type1')[['abilities', 'name']]

In [95]:
types.get_group('water')

Unnamed: 0,abilities,name
6,"[Torrent, Rain Dish]",Squirtle
7,"[Torrent, Rain Dish]",Wartortle
8,"[Torrent, Rain Dish]",Blastoise
53,"[Damp, Cloud Nine, Swift Swim]",Psyduck
54,"[Damp, Cloud Nine, Swift Swim]",Golduck
...,...,...
750,"[Water Bubble, Water Absorb]",Dewpider
751,"[Water Bubble, Water Absorb]",Araquanid
770,"[Innards Out, Unaware]",Pyukumuku
778,"[Dazzling, Strong Jaw, Wonder Skin ]",Bruxish


In [96]:
# we can see here groups is a dictionary
types.groups

{'bug': [9, 10, 11, 12, 13, 14, 45, 46, 47, 48, 122, 126, 164, 165, 166, 167, 192, 203, 204, 211, 212, 213, 264, 265, 266, 267, 268, 282, 283, 289, 290, 291, 312, 313, 400, 401, 411, 412, 413, 414, 415, 468, 539, 540, 541, 542, 543, 544, 556, 557, 587, 588, 594, 595, 615, 616, 631, 635, 636, 648, 663, 664, 665, 735, 736, 737, 741, 742, 766, 767, 793, 794], 'dark': [196, 197, 214, 227, 228, 260, 261, 301, 358, 429, 460, 490, 508, 509, 558, 559, 569, 570, 623, 624, 628, 629, 632, 633, 634, 685, 686, 716, 798], 'dragon': [146, 147, 148, 333, 370, 371, 372, 379, 380, 383, 442, 443, 444, 609, 610, 611, 620, 642, 643, 645, 703, 704, 705, 717, 781, 782, 783], 'electric': [24, 25, 80, 81, 99, 100, 124, 134, 144, 171, 178, 179, 180, 238, 242, 308, 309, 310, 311, 402, 403, 404, 416, 461, 465, 478, 521, 522, 586, 601, 602, 603, 641, 693, 694, 701, 776, 784, 795], 'fairy': [34, 35, 172, 174, 175, 208, 209, 467, 668, 669, 670, 681, 682, 683, 684, 699, 715, 763], 'fighting': [55, 56, 65, 66, 67, 105

In [111]:
# featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]

featuresets = []

for p_type in types.groups:
    curr_type_df = types.get_group(p_type)
    featuresets += [(move_features(abilities), p_type) for abilities in curr_type_df['abilities']]
        
featuresets

[({'first_move': 'Shield Dust', 'second_move': 'Run Away'}, 'bug'),
 ({'first_move': 'Shed Skin', 'second_move': 'None'}, 'bug'),
 ({'first_move': 'Compoundeyes', 'second_move': 'Tinted Lens'}, 'bug'),
 ({'first_move': 'Shield Dust', 'second_move': 'Run Away'}, 'bug'),
 ({'first_move': 'Shed Skin', 'second_move': 'None'}, 'bug'),
 ({'first_move': 'Swarm', 'second_move': 'Sniper'}, 'bug'),
 ({'first_move': 'Effect Spore', 'second_move': 'Dry Skin'}, 'bug'),
 ({'first_move': 'Effect Spore', 'second_move': 'Dry Skin'}, 'bug'),
 ({'first_move': 'Compoundeyes', 'second_move': 'Tinted Lens'}, 'bug'),
 ({'first_move': 'Shield Dust', 'second_move': 'Tinted Lens'}, 'bug'),
 ({'first_move': 'Swarm', 'second_move': 'Technician'}, 'bug'),
 ({'first_move': 'Hyper Cutter', 'second_move': 'Mold Breaker'}, 'bug'),
 ({'first_move': 'Swarm', 'second_move': 'Early Bird'}, 'bug'),
 ({'first_move': 'Swarm', 'second_move': 'Early Bird'}, 'bug'),
 ({'first_move': 'Swarm', 'second_move': 'Insomnia'}, 'bug'),


In [None]:
labeled_names = ([(name.strip(), 'male') for name in open('../datasets/male.txt')] + 
                 [(name.strip(), 'female') for name in open('../datasets/female.txt')])