In [1]:
import pandas as pd  

In [2]:
path = 'pokemon_data.csv'
# the converters argument will let Python read the ability column as a list, not a string
pokemon = pd.read_csv(path, converters={'abilities' : pd.eval})

In [3]:
def move_features(abilities):
    if len(abilities) == 1:
        abilities.append('None')
    return {'first_move': abilities[0], 'second_move': abilities[1]}

move_features(['Overgrow', 'Chlorophyll'])

{'first_move': 'Overgrow', 'second_move': 'Chlorophyll'}

In [8]:
# now we want to combine abilities with type
# we can do this with the zip function - which will produce an iterator of tuples
# each tuple will have the format: (['ability1', 'ability2'], type)
zipped_features = zip(pokemon.abilities, pokemon.type1)

In [9]:
# I know need the ability list to have the syntax needed for a classifier
# for this, we can use our move_feature function
featuresets = [(move_features(abilities), p_type) for abilities, p_type in zipped_features ]
featuresets

[({'first_move': 'Overgrow', 'second_move': 'Chlorophyll'}, 'grass'),
 ({'first_move': 'Overgrow', 'second_move': 'Chlorophyll'}, 'grass'),
 ({'first_move': 'Overgrow', 'second_move': 'Chlorophyll'}, 'grass'),
 ({'first_move': 'Blaze', 'second_move': 'Solar Power'}, 'fire'),
 ({'first_move': 'Blaze', 'second_move': 'Solar Power'}, 'fire'),
 ({'first_move': 'Blaze', 'second_move': 'Solar Power'}, 'fire'),
 ({'first_move': 'Torrent', 'second_move': 'Rain Dish'}, 'water'),
 ({'first_move': 'Torrent', 'second_move': 'Rain Dish'}, 'water'),
 ({'first_move': 'Torrent', 'second_move': 'Rain Dish'}, 'water'),
 ({'first_move': 'Shield Dust', 'second_move': 'Run Away'}, 'bug'),
 ({'first_move': 'Shed Skin', 'second_move': 'None'}, 'bug'),
 ({'first_move': 'Compoundeyes', 'second_move': 'Tinted Lens'}, 'bug'),
 ({'first_move': 'Shield Dust', 'second_move': 'Run Away'}, 'bug'),
 ({'first_move': 'Shed Skin', 'second_move': 'None'}, 'bug'),
 ({'first_move': 'Swarm', 'second_move': 'Sniper'}, 'bug'),

In [10]:
import random
# best to shuffle before training
# or else the earlier classifications will get more training than the classifications that come later
# ie, all the bugs get trained, none of the water types 
random.shuffle(featuresets)

In [11]:
import nltk
import math

split_num = math.floor(len(featuresets)*.8)

# split feature sets into training and test sets (here we'll try 80% train, 20% test)
train_set, test_set = featuresets[:split_num], featuresets[split_num+1:]

In [12]:
# build a classifier based on the training set
classifier = nltk.NaiveBayesClassifier.train(train_set)

In [15]:
classifier.classify(move_features(['Stench', 'Sticky Hold', 'Aftermath']))

'poison'

In [16]:
# Lets evaluate the classifier on a much larger quantity of unseen data - robustness of our model
print(nltk.classify.accuracy(classifier, test_set))

0.70625


In [None]:
# Precision: the number of True Positives divided by the number of True Positives and False Positives
# --> it is the number of positive predictions divided by the total number of positive class values predicted
# --> effectively a measure of a classifier's exactness (low precision indicates a large number of False Positives)


# Recall: the number of True Positives divided by the number of True Positives and the number of False Negatives
# --> number of positive predictions divided by the number of positive class values in the test data.
# --> effectively a measure of a classifier's completeness (low recall indicates many False Negatives)


# F1 Score: (also called F measure) conveys the balance between the precision and recall

In [17]:
# Finally, we can examine the classifier to determine which features it found most 
# effective for distinguishing the pokemone type

# Return a list of the 'most informative' features used by this
#         classifier.  For the purpose of this function, the
#         informativeness of a feature ``(fname,fval)`` is equal to the
#         highest value of P(fname=fval|label), for any label, divided by
#         the lowest value of P(fname=fval|label), for any label:

classifier.show_most_informative_features(12)

Most Informative Features
              first_move = 'Chlorophyll'   grass : normal =     13.2 : 1.0
              first_move = 'Swift Swim'    water : bug    =     10.1 : 1.0
             second_move = 'None'         psychi : fairy  =      6.9 : 1.0
             second_move = 'Frisk'         ghost : normal =      6.3 : 1.0
              first_move = 'Static'       electr : ground =      5.4 : 1.0
              first_move = 'Guts'         fighti : rock   =      4.9 : 1.0
             second_move = 'Sturdy'         rock : water  =      4.8 : 1.0
              first_move = 'Run Away'     normal : fire   =      4.6 : 1.0
              first_move = 'Keen Eye'     normal : water  =      4.6 : 1.0
              first_move = 'Poison Point' poison : water  =      4.6 : 1.0
              first_move = 'Hustle'        fairy : water  =      4.5 : 1.0
              first_move = 'Oblivious'       ice : bug    =      4.4 : 1.0
