In [None]:
# import pandas and rename as pd
import pandas as pd

In [None]:
# set the path to our file
path = 'pokemon_data.csv'
# convert our csv file to a pandas DataFrame
# converters allows pandas to know that abilities is a list, not a string
pokemon = pd.read_csv(path, converters={'abilities': pd.eval})

In [None]:
# print out our dataset
pokemon

In [None]:
# we can use dot notation, because we are using the pandas library AND
# no space in the column
abilities = pokemon.abilities

In [None]:
# print the array of ablilities for the first pokemon
abilities[0]

In [None]:
# check what data type this is
type(abilities[0])

In [None]:
# see how many different pokemon types there are
len(pokemon.type1.unique())

In [None]:
# establishing a base line for our algorithm
1/18

In [None]:
# defining our features
# key-value pairs
# the keys will be the same for everyone, the values will differ
# JUST AN EXAMPLE
# {'first_move': Overgrow, 'second_move': Chlorophyll}

In [None]:
# our first function!
def create_features(pk_moves):
    if len(pk_moves) == 1:
        pk_moves.append('Nothing')
    return {'first_move': pk_moves[0], 'second_move': pk_moves[1]}

In [None]:
# zip combines our two columns
# we are making tuples
# syntax for tuple (thing1, thing2)
# ([Overgrow, Chlorophyll], grass)
features_and_labels = zip(pokemon.abilities, pokemon.type1)
features_and_labels

In [None]:
# using list comprehension
# we are creating a list of tuples
feature_sets = [ (create_features(x), y) for x, y in features_and_labels ]
feature_sets

In [None]:
# always want to shuffle your dataset before classifying
import random
random.shuffle(feature_sets)

In [None]:
# let's look at how many feature sets we have
len(feature_sets)

In [None]:
# determine what 80% of this number would look like
801 * .8

In [None]:
# spliting our training set and testing set into a 80/20 ratio
# training is about 'learning'
# testing is to determine accuracy 
training_set = feature_sets[:640]
testing_set = feature_sets[640:]

In [None]:
import nltk

# build our first classifier!
classifier = nltk.NaiveBayesClassifier.train(training_set)

In [None]:
# test out our classifier
classifier.classify(create_features(['Blaze', 'Solar Power']))

In [None]:
# how accurate is our classifier?
print(nltk.classify.accuracy(classifier, testing_set))

In [None]:
# show which features were most important for the algorithm
classifier.show_most_informative_features(12)