In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import StackingClassifier,RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
pokemon = pd.read_csv("pokemon.csv")
battle = pd.read_csv("combats.csv")

In [3]:
pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


Merging the data

In [4]:
data = battle.merge(pokemon, left_on='First_pokemon', right_on= '#')
data = data.merge(pokemon , left_on='Second_pokemon', right_on='#', suffixes=('_1','_2'))

In [5]:
data.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner,#_1,Name_1,Type 1_1,Type 2_1,HP_1,Attack_1,Defense_1,...,Type 1_2,Type 2_2,HP_2,Attack_2,Defense_2,Sp. Atk_2,Sp. Def_2,Speed_2,Generation_2,Legendary_2
0,266,298,298,266,Larvitar,Rock,Ground,50,64,50,...,Grass,Dark,70,70,40,60,40,60,3,False
1,702,701,701,702,Virizion,Grass,Fighting,91,90,72,...,Rock,Fighting,91,129,90,72,90,108,5,True
2,191,668,668,191,Togetic,Fairy,Flying,55,40,85,...,Psychic,,75,75,75,125,95,40,5,False
3,237,683,683,237,Slugma,Fire,,40,40,40,...,Dragon,,77,120,90,60,90,48,5,False
4,151,231,151,151,Omastar,Rock,Water,70,60,125,...,Bug,Rock,20,10,230,10,230,5,2,False


Type Effectiveness

In [6]:
#calculating stat Difference
data['Attack_diff'] = data['Attack_1'] - data['Attack_2']
data['Defence_diff'] = data['Defense_1'] - data['Defense_2']
data['Sp. Atk_diff'] = data['Sp. Atk_1'] - data['Sp. Atk_2']
data['Sp. Def_diff'] = data['Sp. Def_1'] - data['Sp. Def_2']
data['Speed_diff'] = data['Speed_1'] - data['Speed_2']
data['HP_diff'] = data['HP_1'] - data['HP_2']

#Legendary Difference
data['Legendary_1'] = data['Legendary_1'].astype(int)
data['Legendary_2'] = data['Legendary_2'].astype(int)

data['Legendary_diff'] = data['Legendary_1'] - data['Legendary_2']

In [7]:
#creating type effectiveness
type_effectiveness = {
    # Normal
    ('Normal','Rock'):0.5,('Normal','Ghost'):0.0,('Normal','Steel'):0.5,
    # Fire
    ('Fire','Fire'):0.5,('Fire','Water'):0.5,('Fire','Grass'):2.0,('Fire','Ice'):2.0,
    ('Fire','Bug'):2.0,('Fire','Rock'):0.5,('Fire','Dragon'):0.5,('Fire','Steel'):2.0,
    # Water
    ('Water','Fire'):2.0,('Water','Water'):0.5,('Water','Grass'):0.5,
    ('Water','Ground'):2.0,('Water','Rock'):2.0,('Water','Dragon'):0.5,
    # Electric
    ('Electric','Water'):2.0,('Electric','Electric'):0.5,('Electric','Grass'):0.5,
    ('Electric','Ground'):0.0,('Electric','Flying'):2.0,('Electric','Dragon'):0.5,
    # Grass
    ('Grass','Fire'):0.5,('Grass','Water'):2.0,('Grass','Grass'):0.5,
    ('Grass','Poison'):0.5,('Grass','Ground'):2.0,('Grass','Flying'):0.5,
    ('Grass','Bug'):0.5,('Grass','Rock'):2.0,('Grass','Dragon'):0.5,('Grass','Steel'):0.5,
    # Ice
    ('Ice','Fire'):0.5,('Ice','Water'):0.5,('Ice','Grass'):2.0,
    ('Ice','Ground'):2.0,('Ice','Flying'):2.0,('Ice','Dragon'):2.0,('Ice','Steel'):0.5,
    # Fighting
    ('Fighting','Normal'):2.0,('Fighting','Ice'):2.0,('Fighting','Rock'):2.0,('Fighting','Dark'):2.0,
    ('Fighting','Steel'):2.0,('Fighting','Poison'):0.5,('Fighting','Flying'):0.5,('Fighting','Psychic'):0.5,
    ('Fighting','Bug'):0.5,('Fighting','Ghost'):0.0,('Fighting','Fairy'):0.5,
    # Poison
    ('Poison','Grass'):2.0,('Poison','Poison'):0.5,('Poison','Ground'):0.5,
    ('Poison','Rock'):0.5,('Poison','Ghost'):0.5,('Poison','Steel'):0.0,
    ('Poison','Fairy'):2.0,
    # Ground
    ('Ground','Fire'):2.0,('Ground','Electric'):2.0,('Ground','Grass'):0.5,('Ground','Poison'):2.0,
    ('Ground','Flying'):0.0,('Ground','Bug'):0.5,('Ground','Rock'):2.0,('Ground','Steel'):2.0,
    # Flying
    ('Flying', 'Electric'):0.5,('Flying', 'Grass'): 2.0,('Flying', 'Fighting'): 2.0,
    ('Flying', 'Bug'): 2.0,('Flying', 'Rock'): 0.5,('Flying', 'Steel'): 0.5,
    # Psychic
    ('Psychic', 'Fighting'): 2.0,('Psychic', 'Poison'): 2.0,('Psychic', 'Psychic'): 0.5,
    ('Psychic', 'Steel'): 0.5,('Psychic', 'Dark'): 0.0,
    # Bug
    ('Bug', 'Fire'): 0.5,('Bug', 'Grass'): 2.0,('Bug', 'Fighting'): 0.5,
    ('Bug', 'Poison'): 0.5,('Bug', 'Flying'): 0.5,('Bug', 'Psychic'): 2.0,
    ('Bug', 'Ghost'): 0.5,('Bug', 'Dark'): 2.0,('Bug', 'Steel'): 0.5,('Bug', 'Fairy'): 0.5,
    # Rock
    ('Rock', 'Fire'): 2.0,('Rock', 'Ice'): 2.0,('Rock', 'Fighting'): 0.5,('Rock', 'Ground'): 0.5,
    ('Rock', 'Flying'): 2.0,('Rock', 'Bug'): 2.0,('Rock', 'Steel'): 0.5,
    # Ghost
    ('Ghost', 'Normal'): 0.0,('Ghost', 'Psychic'): 2.0,('Ghost', 'Ghost'): 2.0,('Ghost', 'Dark'): 0.5,
    # Dragon
    ('Dragon', 'Dragon'): 2.0,('Dragon', 'Steel'): 0.5,('Dragon', 'Fairy'): 0.0,
    # Dark
    ('Dark', 'Fighting'): 0.5,('Dark', 'Psychic'): 2.0,('Dark', 'Ghost'): 2.0,
    ('Dark', 'Dark'): 0.5,('Dark', 'Fairy'): 0.5,
    # Steel
    ('Steel', 'Fire'): 0.5,('Steel', 'Water'): 0.5,('Steel', 'Electric'): 0.5,('Steel', 'Ice'): 2.0,
    ('Steel', 'Rock'): 2.0,('Steel', 'Steel'): 0.5,('Steel', 'Fairy'): 2.0,
    # Fairy
    ('Fairy', 'Fire'): 0.5,('Fairy', 'Fighting'): 2.0,('Fairy', 'Poison'): 0.5,
    ('Fairy', 'Dragon'): 2.0,('Fairy', 'Dark'): 2.0,('Fairy', 'Steel'): 0.5,
}


In [8]:
def get_type_effectiveness(attacking_type, deffending_type):
    return type_effectiveness.get((attacking_type,deffending_type), 1.0)

data['type_effectiveness_1_vs_2'] = data.apply(
    lambda row: get_type_effectiveness(row['Type 1_1'], row['Type 1_2']),axis=1
)

data['type_effectiveness_2_vs_1'] =  data.apply(
    lambda row: get_type_effectiveness(row['Type 1_2'], row['Type 1_1']),axis=1
)

In [9]:
data['type_effectiveness_diff'] = data['type_effectiveness_1_vs_2'] - data['type_effectiveness_2_vs_1']
data['target'] = (data['Winner'] == data['Second_pokemon']).astype(int)

In [10]:
print(data.columns)

Index(['First_pokemon', 'Second_pokemon', 'Winner', '#_1', 'Name_1',
       'Type 1_1', 'Type 2_1', 'HP_1', 'Attack_1', 'Defense_1', 'Sp. Atk_1',
       'Sp. Def_1', 'Speed_1', 'Generation_1', 'Legendary_1', '#_2', 'Name_2',
       'Type 1_2', 'Type 2_2', 'HP_2', 'Attack_2', 'Defense_2', 'Sp. Atk_2',
       'Sp. Def_2', 'Speed_2', 'Generation_2', 'Legendary_2', 'Attack_diff',
       'Defence_diff', 'Sp. Atk_diff', 'Sp. Def_diff', 'Speed_diff', 'HP_diff',
       'Legendary_diff', 'type_effectiveness_1_vs_2',
       'type_effectiveness_2_vs_1', 'type_effectiveness_diff', 'target'],
      dtype='object')


Splitting the Data

In [11]:
feature = [
   'Attack_diff','Defence_diff', 'Sp. Atk_diff', 'Sp. Def_diff', 'Speed_diff',
    'HP_diff','Legendary_diff','type_effectiveness_diff'
]

x = data[feature]
y = data['target']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42)

Standardizing the Data

In [12]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

Defining the stacked model

In [13]:
base1 = [
    ('rf',RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgb',XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('lr',LogisticRegression(max_iter=1000, random_state=42)),
    ('svm',SVC(probability=True, random_state=42))
]

meta_model1 = MLPClassifier(hidden_layer_sizes=(50, 20), max_iter=1000, random_state=42)

stacked1 = StackingClassifier(
    estimators=base1,
    final_estimator=meta_model1,
    passthrough=True,  # set True to include original features in meta-model
    cv=10,               # cross-validation folds for base model predictions
    n_jobs=-1           # use all cores
)

base2 = [
    ('svm', SVC(probability=True, C=1, kernel='rbf', random_state=42)),
    ('xgb', XGBClassifier(n_estimators=150, max_depth=5, use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('rf', RandomForestClassifier(n_estimators=300, max_depth=None, random_state=42)),
    ('knn', KNeighborsClassifier(n_neighbors=5))  # adding KNN for model diversity
]

meta_model2 = LogisticRegression(max_iter=1000, random_state=42)

stacked2 = StackingClassifier(
    estimators=base2,
    final_estimator=meta_model2,
    passthrough=True,  # set True to include original features in meta-model
    cv=10,               # cross-validation folds for base model predictions
    n_jobs=-1           # use all cores
)


final_ensemble = VotingClassifier(
    estimators=[
        ('stacked 1', stacked1),
        ('stacked 2', stacked2)
    ],
    voting='soft',
    n_jobs=-1
)

In [14]:
model = final_ensemble.fit(x_train_scaled,y_train)

In [15]:
model

In [16]:
y_pred = model.predict(x_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("Stacking Ensemble Accuracy:", accuracy)

Stacking Ensemble Accuracy: 0.95716


In [18]:
import pickle

# Save the model
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Load the model
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)


In [19]:
import joblib

# Save scaler
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']