In [149]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [150]:
#load in the data frame

df = pd.read_csv('C:/Users/ryhen/OneDrive - UCB-O365/Machine Learning/Project/Part 3/Naive_Bayes/Naive_Bayes_Data.csv')
#change the rank_category 0 to bad, 1 to okay and 2 to good
df['rank_category'] = df['rank_category'].replace(0, 'bad')
df['rank_category'] = df['rank_category'].replace(1, 'okay')
df['rank_category'] = df['rank_category'].replace(2, 'good')
df.head()

Unnamed: 0,rank_category,pokemon,item,ability,tera type,move_1,move_2,move_3,move_4,tournament
0,good,miraidon,choice specs,hadron engine,fairy,electro-drift,draco-meteor,dazzling-gleam,volt-switch,worlds
1,good,ogerpon,hearthflame mask,mold breaker,fire,spiky-shield,ivy-cudgel,wood-hammer,follow-me,worlds
2,good,urshifu,focus sash,unseen fist,water,detect,surging-strikes,close-combat,aqua-jet,worlds
3,good,iron-hands,assault vest,quark drive,bug,drain-punch,wild-charge,fake-out,low-kick,worlds
4,good,whimsicott,covert cloak,prankster,dark,moonblast,encore,light-screen,tailwind,worlds


# Tree 1

In [151]:
# Select features and target variable
features = ['pokemon','item', 'ability', 'tera type', 'move_1', 'move_2', 'move_3', 'move_4']
target = 'rank_category'

features = df[features]
target = df[target]

# Encode the categorical features
features = features.apply(LabelEncoder().fit_transform)
features

Unnamed: 0,pokemon,item,ability,tera type,move_1,move_2,move_3,move_4
0,80,16,48,4,44,38,32,198
1,84,41,68,6,157,103,226,62
2,130,38,137,18,31,188,26,6
3,58,3,91,0,39,216,56,102
4,135,22,82,1,110,52,114,178
...,...,...,...,...,...,...,...,...
4447,124,38,82,8,10,99,198,131
4448,130,12,137,1,192,25,193,196
4449,117,49,17,16,38,38,129,131
4450,25,80,134,9,190,127,46,131


In [152]:
# Define X (features) and y (target)
X = features
y = target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=106)


In [153]:
##Instantiate the decision tree using the defaults.
MyDT_Classifier = DecisionTreeClassifier(criterion='gini')
## Use fit to create the decision tree model
MyDT_Classifier = MyDT_Classifier.fit(X_train, y_train)

FeatureNames=X_train.columns.values ## get all the feature/variable names
print(FeatureNames)
ClassNames=MyDT_Classifier.classes_ ## Get the class names
print(ClassNames)

['pokemon' 'item' 'ability' 'tera type' 'move_1' 'move_2' 'move_3'
 'move_4']
['bad' 'good' 'okay']


In [154]:
##Tree Plot Option 1
# cut off the tree at the 3rd level
plt.figure(figsize=(35,20))
MyPlot=tree.plot_tree(MyDT_Classifier, 
                   feature_names=FeatureNames,  
                   class_names=ClassNames,
                   filled=True,
                   max_depth=3)
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
## To see the tree, open this file on your computer :)
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
plt.savefig("PokemonTree1.jpg")
plt.close()

In [155]:
# Use the Tree to make predictions
y_pred = MyDT_Classifier.predict(X_test)

# Calculate the accuracy of the model

accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

report = classification_report(y_test, y_pred)
print(report)

# Calculate the confusion matrix

conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)




Accuracy: 0.4208754208754209
              precision    recall  f1-score   support

         bad       0.25      0.27      0.26       222
        good       0.18      0.17      0.18       153
        okay       0.57      0.56      0.57       516

    accuracy                           0.42       891
   macro avg       0.33      0.33      0.33       891
weighted avg       0.42      0.42      0.42       891

[[ 60  36 126]
 [ 37  26  90]
 [146  81 289]]


# Tree 2

In [156]:
##Instantiate the decision tree using the defaults.
MyDT_Classifier = DecisionTreeClassifier(criterion='entropy')
## Use fit to create the decision tree model
MyDT_Classifier = MyDT_Classifier.fit(X_train, y_train)

FeatureNames=X_train.columns.values ## get all the feature/variable names
print(FeatureNames)
ClassNames=MyDT_Classifier.classes_ ## Get the class names
print(ClassNames)

##Tree Plot 
# cut off the tree at the 4th level
plt.figure(figsize=(40,25))
MyPlot=tree.plot_tree(MyDT_Classifier, 
                   feature_names=FeatureNames,  
                   class_names=ClassNames,
                   filled=True,
                   max_depth=3)

plt.savefig("PokemonTree2.jpg")
plt.close()

# Use the Tree to make predictions
y_pred = MyDT_Classifier.predict(X_test)

# Calculate the accuracy of the model

accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

report = classification_report(y_test, y_pred)
print(report)

# Calculate the confusion matrix

conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)


['pokemon' 'item' 'ability' 'tera type' 'move_1' 'move_2' 'move_3'
 'move_4']
['bad' 'good' 'okay']
Accuracy: 0.39057239057239057
              precision    recall  f1-score   support

         bad       0.21      0.23      0.22       222
        good       0.12      0.11      0.12       153
        okay       0.55      0.54      0.54       516

    accuracy                           0.39       891
   macro avg       0.29      0.29      0.29       891
weighted avg       0.39      0.39      0.39       891

[[ 51  37 134]
 [ 38  17  98]
 [151  85 280]]


# Tree 3

In [157]:
##Instantiate the decision tree using the defaults.
MyDT_Classifier = DecisionTreeClassifier(splitter='random')
## Use fit to create the decision tree model
MyDT_Classifier = MyDT_Classifier.fit(X_train, y_train)

FeatureNames=X_train.columns.values ## get all the feature/variable names
print(FeatureNames)
ClassNames=MyDT_Classifier.classes_ ## Get the class names
print(ClassNames)

##Tree Plot 
# cut off the tree at the 4th level
plt.figure(figsize=(40,25))
MyPlot=tree.plot_tree(MyDT_Classifier, 
                   feature_names=FeatureNames,  
                   class_names=ClassNames,
                   filled=True,
                   max_depth=3)

plt.savefig("PokemonTree3.jpg")
plt.close()

# Use the Tree to make predictions
y_pred = MyDT_Classifier.predict(X_test)

# Calculate the accuracy of the model

accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

report = classification_report(y_test, y_pred)
print(report)

# Calculate the confusion matrix

conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

['pokemon' 'item' 'ability' 'tera type' 'move_1' 'move_2' 'move_3'
 'move_4']
['bad' 'good' 'okay']
Accuracy: 0.44107744107744107
              precision    recall  f1-score   support

         bad       0.29      0.32      0.31       222
        good       0.15      0.12      0.13       153
        okay       0.59      0.59      0.59       516

    accuracy                           0.44       891
   macro avg       0.34      0.34      0.34       891
weighted avg       0.44      0.44      0.44       891

[[ 72  34 116]
 [ 39  19  95]
 [136  78 302]]
