In [None]:
# Dependencies

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
import os

In [None]:
pokedex = pd.read_csv('Pokemon.csv')
pokedex.head()

In [None]:
# This is Daniels section of code
# Classification of Legendary Status
y = pokedex["Legendary"]
y.value_counts()


In [None]:
X = pokedex.drop("Legendary", axis=1)
X = X.drop("#", axis=1)
X = X.drop("Name", axis=1)
X = X.drop("Type 1", axis=1)
X = X.drop("Type 2", axis=1)
X = X.drop("Generation", axis=1)
X.head()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)

In [None]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Loop through different k values to see which has the highest accuracy
# Note: We only use odd numbers because we don't want any ties
train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    train_score = knn.score(X_train_scaled, y_train)
    test_score = knn.score(X_test_scaled, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")
    
    
plt.plot(range(1, 20, 2), train_scores, marker='o')
plt.plot(range(1, 20, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.show()

In [None]:
# Note that k: 15 provides the best accuracy where the classifier starts to stablize
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train_scaled, y_train)
print('k=15 Test Acc: %.3f' % knn.score(X_test_scaled, y_test))

In [None]:
predicted_class = knn.predict(X_scaler.transform(X))
# print(predicted_class)

In [None]:
# Classification of Primary Type
y2 = pokedex["Type 1"]
y2.value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y2, random_state=1)

X_scaler2 = StandardScaler().fit(X_train)

X_train_scaled = X_scaler2.transform(X_train)
X_test_scaled = X_scaler2.transform(X_test)

In [None]:
# Loop through different k values to see which has the highest accuracy
# Note: We only use odd numbers because we don't want any ties
train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    train_score = knn.score(X_train_scaled, y_train)
    test_score = knn.score(X_test_scaled, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")
    
    
plt.plot(range(1, 20, 2), train_scores, marker='o')
plt.plot(range(1, 20, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.show()

In [None]:
# Note that k: 17 provides the best accuracy where the classifier starts to stablize
knn = KNeighborsClassifier(n_neighbors=17)
knn.fit(X_train_scaled, y_train)
print('k=17 Test Acc: %.3f' % knn.score(X_test_scaled, y_test))

In [None]:
predicted_class = knn.predict(X_scaler2.transform(X))
# print(predicted_class)

predictions = pokedex[['Name',"Type 1"]]
predictions['Predicted Type'] = predicted_class
predictions.head()

In [None]:
accuracy = []
for index, row in predictions.iterrows():
    if predictions.loc[index,'Type 1'] == predictions.loc[index,'Predicted Type']:
        accuracy.append(1)
    else:
        accuracy.append(0)
predictions['Accurate'] = accuracy
predictions.head()

In [None]:
grouping = predictions.groupby('Type 1')
grouping.mean()

In [None]:
# This is Tanners section of code

from numpy.random import seed
seed(1)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets


y = pokedex['Legendary']

X = pokedex.drop("Legendary", axis=1)
X = X.drop("#", axis=1)
X = X.drop("Name", axis=1)
X = X.drop("Type 1", axis=1)
X = X.drop("Type 2", axis=1)
X.head()

In [None]:
 # Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)

In [None]:
 # Transform the training and testing data using the X_scaler

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=8))
model.add(Dense(units=2, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
 # Fit the model to the training data
history = model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle = True,
    verbose = 2,
    validation_data=(X_test_scaled,y_test_categorical)
)

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
accuracy = history_dict['accuracy']
val_accuracy = history_dict['val_accuracy']
 
epochs = range(1, len(loss_values) + 1)
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
#
# Plot the model accuracy vs Epochs
#
ax[0].plot(epochs, accuracy, 'bo', label='Training accuracy')
ax[0].plot(epochs, val_accuracy, 'b', label='Validation accuracy')
ax[0].set_title('Training & Validation Accuracy', fontsize=16)
ax[0].set_xlabel('Epochs', fontsize=16)
ax[0].set_ylabel('Accuracy', fontsize=16)
ax[0].legend()
#
# Plot the loss vs Epochs
#
ax[1].plot(epochs, loss_values, 'bo', label='Training loss')
ax[1].plot(epochs, val_loss_values, 'b', label='Validation loss')
ax[1].set_title('Training & Validation Loss', fontsize=16)
ax[1].set_xlabel('Epochs', fontsize=16)
ax[1].set_ylabel('Loss', fontsize=16)
ax[1].legend()

In [None]:
# Classification of Primary Type
y2 = pokedex["Type 1"]
y2.value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y2, stratify=y2,random_state=1)

X_scaler2 = StandardScaler().fit(X_train)

X_train_scaled = X_scaler2.transform(X_train)
X_test_scaled = X_scaler2.transform(X_test)

In [None]:
# One-hot encoding
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.fit_transform(y_test)

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)



In [None]:
model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=8))
model.add(Dense(units=18, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Compile the model

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


'''
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
'''

In [None]:
 # Fit the model to the training data
history = model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle = True,
    verbose = 2,
    validation_data=(X_test_scaled,y_test_categorical)
)

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# This is Michaels section of code

In [None]:
# This is Masons section of code

# Set our prediction targets and name them

y = pokedex["Legendary"]
target_names = ["negative", "positive"]


#Classification of Legendary Status

X = pokedex.drop("Legendary", axis=1)

X = X.drop("#", axis=1)

X = X.drop("Name", axis=1)

X = X.drop("Type 1", axis=1)

X = X.drop("Type 2", axis=1)

feature_names = X.columns

X.head()

In [None]:
# Import learning and training for the random forest

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:

# Import the random forest classifier and input values and variables

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier (n_estimators=300)

rf = rf.fit(X_train, y_train)

rf.score(X_test, y_test)

In [None]:
# Sort by importance for predicting Legendary Status

sorted(zip(rf.feature_importances_, feature_names), reverse = True)

In [None]:
# Classification of Primary Type

y2 = pokedex["Type 1"]

y2.value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y2, random_state=1)


In [None]:
rf = RandomForestClassifier (n_estimators=300)

rf = rf.fit(X_train, y_train)

rf.score(X_test, y_test)