## Legendary Pokémon Classification

Given data about various Pokémon,let's try to predict if a given Pokémon is legendry or not.

we will use a TensorFlow ANN to make our predictions.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.decomposition import PCA

In [None]:
data = pd.read_csv('/kaggle/input/pokemon/Pokemon.csv')

In [None]:
data

In [None]:
data_raw = data.copy()

 # Cleaning

In [None]:
data.info()

In [None]:
data.isna().sum()

In [None]:
data = data.drop(['#','Name','Type 2'],axis=1)

In [None]:
data['Legendary']=data['Legendary'].astype(np.int)

In [None]:
data

# EDA

In [None]:
data['Type 1'].unique

In [None]:
numeric_columns = data.drop('Type 1',axis=1).columns

In [None]:
correlation_matrix = data[numeric_columns].corr()

plt.figure(figsize=(18,15))
sns.heatmap(correlation_matrix, annot=True, vmin=-1.0, vmax=1.0)
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
for column in ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']:
    sns.kdeplot(data[column], shade=True)
plt.show()

# Preprocessing

In [None]:
data.dtypes

# Encoding

In [None]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
data = onehot_encode(data, 'Type 1', 't')

In [None]:
data

# Splitting and Scaling

In [None]:
y = data['Legendary']
X = data.drop('Legendary', axis=1)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

# Training

In [None]:
X.shape

In [None]:
inputs = tf.keras.Input(shape=(26,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)


batch_size = 32
epochs = 20


history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=0
)

# Results

In [None]:
fig_loss = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'x': "Epoch", 'y':"Loss"},
    title="Loss Over Time"
)

fig_loss.show()

In [None]:
np.argmin(history.history['val_loss'])

In [None]:
fig_auc = px.line(
    history.history,
    y=['auc', 'val_auc'],
    labels={'x': "Epoch", 'y':"AUC"},
    title="AUC Over Time"
)

fig_auc.show()

In [None]:
model.evaluate(X_test, y_test)

# Post-training Analysis

In [None]:
predictions = np.hstack((model.predict(X_test) >= 0.5).astype(np.int)) != y_test
predictions

In [None]:
mislabeled_indices = y_test[predictions].index

In [None]:
data_raw.loc[mislabeled_indices, :]

In [None]:
X.shape

In [None]:
pca = PCA(n_components=2)
data_reduced = pd.DataFrame(pca.fit_transform(data), columns=["PC1", "PC2"])

In [None]:
data_reduced

In [None]:
legendary_indices = data.query("Legendary == 1").index

mislabeled_legendary_indices = np.intersect1d(mislabeled_indices, legendary_indices)

In [None]:
plt.figure(figsize=(20, 10))

plt.scatter(data_reduced['PC1'], data_reduced['PC2'], c='lightgray')
plt.scatter(data_reduced.loc[legendary_indices, 'PC1'], data_reduced.loc[legendary_indices, 'PC2'], c='dimgray')
plt.scatter(data_reduced.loc[mislabeled_indices, 'PC1'], data_reduced.loc[mislabeled_indices, 'PC2'], c='orchid')
plt.scatter(data_reduced.loc[mislabeled_legendary_indices, 'PC1'], data_reduced.loc[mislabeled_legendary_indices, 'PC2'], c='mediumspringgreen')

plt.xlabel("PC1")
plt.ylabel("PC2")
plt.legend(['Non-Legendary', 'Legendary', 'Non-Legendary Misclassified', 'Legendary Misclassified'])
plt.title("PCA Scatter Plot")
plt.show()