In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import seaborn as sn

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix

from xgboost import XGBClassifier

from itertools import chain 

# First lets look at data

In [None]:
path = '/kaggle/input/pokemon-challenge/'
pokemon_stats = pd.read_csv(os.path.join(path,'pokemon.csv'))
battles = pd.read_csv(os.path.join(path,'combats.csv'))

In [None]:
battles

In [None]:
pokemon_stats

In [None]:
#We have 18 unique pokemon types
pokemon_types = pokemon_stats['Type 1'].unique().tolist()
len(pokemon_types)

**We have 18 different types of pokemon**

**Name is not needed to predict the battles**

**We can convert 'Type 1' and 'Type 2' categorical to numeric value**

In [None]:
#We dont need Name of pokemon, we can drop it
pokemon_stats = pokemon_stats.drop(columns=['Name'])

#convert categorical to one hot encoding
pokemon_stats =  pd.get_dummies(pokemon_stats)

pokemon_stats

In [None]:
#combining 'Type 1' and 'Type 2' columns.
for cols in pokemon_types:
    pokemon_stats['{}'.format(cols)]=pokemon_stats['Type 1_{}'.format(cols)] | pokemon_stats['Type 2_{}'.format(cols)]
    pokemon_stats = pokemon_stats.drop(columns=['Type 1_{}'.format(cols),'Type 2_{}'.format(cols)],axis=1)
    
pokemon_stats.columns

**Converting 'Legendary' column to numeric**

In [None]:
#converting Legendary column to int
pokemon_stats["Legendary"] = pokemon_stats["Legendary"].astype(int)
pokemon_stats

**Checking for null values**

In [None]:
pokemon_stats.isnull().sum()

In [None]:
pokemon_stats.to_csv('/kaggle/working/pokemon_stats_edited.csv',index=False)

**Checking correlation among variables**

In [None]:
#check correlation of each variable
corrMatrix = pokemon_stats.corr()

plt.figure(figsize=(15,15))
sn.heatmap(corrMatrix, annot=True)
plt.show()

**Battle.csv lists two pokemon ids and the winner id**

**We need to replace the pokemon id with pokemon stats**

**The input to the model will be stats of 2 pokemons**

In [None]:
#first merge pokemon stats for 1st pokemon
combat_df  = pd.merge(battles,pokemon_stats,left_on='First_pokemon',right_on='#')
combat_df

In [None]:
#merge pokemon stats for second pokemon
combat_df = pd.merge(combat_df,pokemon_stats,left_on='Second_pokemon',right_on='#')
combat_df

**Convert Winner column to binary for training**

In [None]:
#Create new winner_x column
# if first pokemon wins, winner_x is 1 else it is 0

combat_df['Winner_x'] = np.NaN
combat_df.loc[(combat_df['First_pokemon']==combat_df['Winner']),'Winner_x'] = 1
combat_df.loc[(combat_df['Second_pokemon']==combat_df['Winner']),'Winner_x'] = 0
combat_df

In [None]:
#drop unwanted columns
combat_df = combat_df.drop(columns=['First_pokemon','Second_pokemon','#_x','#_y','Winner'])
y = combat_df['Winner_x'].values
train_df = combat_df.drop(columns=['Winner_x'])
train_df

In [None]:
train_df['Winner_x'] = y
train_df.to_csv('train.csv',index=False)
train_df

In [None]:
y = train_df['Winner_x'].values

train_df = train_df.drop(columns=['Winner_x'])
scaler = StandardScaler()
scaled_df = pd.DataFrame(scaler.fit_transform(train_df))
scaled_df

In [None]:
x=scaled_df.values  # Features

X_train, X_test, y_train, y_test = train_test_split(x , y, test_size=0.3) # 70% training and 30% test

X_train[0], y_train[0]

**We now have inputs and labels ready**

# Building a Neural network model

In [None]:
EPOCHS = 20
BATCH_SIZE = 16
LEARNING_RATE = 0.001

In [None]:
## train data
class trainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)


train_data = trainData(torch.FloatTensor(X_train), 
                       torch.FloatTensor(y_train))
## test data    
class testData(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = testData(torch.FloatTensor(X_test))

In [None]:
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE)

In [None]:
class binaryClassification(nn.Module):
    def __init__(self):
        super(binaryClassification, self).__init__()
        # Number of input features is 52.
        self.layer_1 = nn.Linear(52, 100) 
        self.layer_2 = nn.Linear(100, 500)
        self.layer_3 = nn.Linear(500,100) 
        self.layer_4 = nn.Linear(100, 50) 
        self.layer_out = nn.Linear(50, 1) 

        self.out_act = nn.Sigmoid()
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.relu(self.layer_2(x))
        x = self.relu(self.layer_3(x))
        x = self.relu(self.layer_4(x))
        x = self.dropout(x)
        x = self.layer_out(x)
        x = self.out_act(x)
        
        return x

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = binaryClassification()
model.to(device)
print(model)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(y_pred)

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [None]:
model.train()
for e in range(0, EPOCHS):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        y_pred = model(X_batch)

        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()

    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')

torch.save(model,'/kaggle/working/model.pt')

In [None]:
from itertools import chain 
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
#flatten 2d list to 1d list
y_pred_list =  list(chain.from_iterable(y_pred_list))  
y_pred_list = np.array(y_pred_list)

In [None]:
acc = (y_test == y_pred_list)
test_acc = acc.sum()/len(acc)
print("Test Acc : ",test_acc)

# Predicting battles in tests.csv

In [None]:
model = torch.load('model.pt')

In [None]:
pokemon_stats = pd.read_csv('pokemon_stats_edited.csv')
tests = pd.read_csv(os.path.join(path,'tests.csv'))
tests

In [None]:
#lets convert test data
test_df  = pd.merge(tests,pokemon_stats,left_on='First_pokemon',right_on='#')
test_df = pd.merge(test_df,pokemon_stats,left_on='Second_pokemon',right_on='#')
test_df

In [None]:
test_df = test_df.drop(columns=['First_pokemon','Second_pokemon','#_x','#_y'])
test_df

In [None]:
scaler = StandardScaler()
test_df = pd.DataFrame(scaler.fit_transform(test_df))
test_df

In [None]:
test_df = test_df.values
test_data = testData(torch.FloatTensor(test_df))
scaled_test_tensor = DataLoader(dataset=test_data, batch_size=BATCH_SIZE)
scaled_test_tensor

In [None]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in scaled_test_tensor:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.cpu().numpy())

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
#flatten 2d list to 1d list
y_pred_list =  list(chain.from_iterable(y_pred_list))  

**Adding prediction column to tests.csv**

In [None]:
tests['Predicted']=y_pred_list
tests

**Here 1 means, first pokemon is winner**

**And 0 means, second pokemon is winner**

In [None]:
tests.loc[(tests['Predicted']==1),'Predicted'] = tests.First_pokemon
tests.loc[(tests['Predicted']==0),'Predicted'] = tests.Second_pokemon
tests['Predicted'] = tests['Predicted'].astype(int)
tests

In [None]:
tests.to_csv('Predicted_tests.csv',index=False)

**Verifing battles with pokemon to see if the model has learnt it properly**

In [None]:
#one example of battle
tests[9998:9999]

In [None]:
pokemon_stats = pd.read_csv(os.path.join(path,'pokemon.csv'))
pokemon_stats = pokemon_stats.loc[((pokemon_stats['#']==643) | (pokemon_stats['#']==259))]
pokemon_stats

**We see that battle is about Elekid vs Swanna**


**Elekid is electric type pokemon, Swanna is water and flying type**

**Elekid should win this battle even though its stats are less because, electric is strong against water and flying in pokemon battle**

**Hence, model is doing well**