# Naive Bayes

Purpose of this notebook is to examin how accurate Naive Bayes classifier is on this data set.

In [1]:
import csv
import operator

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

## Simple preprocessing

In [20]:
decks = pd.read_csv("./data/decks.csv", index_col=0)
games = pd.read_csv("./data/training_games.csv", sep=';', index_col=0)

In [11]:
# Random permutation of games:
games = games.sample(frac=1)

In [13]:
decks['prob'] = [0] * 400
decks['prob'] = decks['prob'].astype('float')

for index, _ in decks.iterrows():
    temp = games.loc[games['Talia1'] == index]
    times_played = temp.shape[0]
    temp = temp.loc[temp['Wynik'] == 'PLAYER_0 WON']
    times_won = temp.shape[0]
    
    temp = games.loc[games['Talia2'] == index]
    times_played = times_played + temp.shape[0]
    temp = temp.loc[temp['Wynik'] == 'PLAYER_1 WON']
    times_won = times_won + temp.shape[0]
    
    decks['prob'][index] = times_won / times_played

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [16]:
bots = ['A1', 'A2', 'B1', 'B2']

for bot in bots:
    decks['prob_' + bot] = [0] * 400
    decks['prob_' + bot] = decks['prob_' + bot].astype('float')
    
    for index, _ in decks.iterrows():
        temp = games.loc[(games['Talia1'] == index) & (games['Gracz1'] == bot)]
        times_played = temp.shape[0]
        temp = temp.loc[temp['Wynik'] == 'PLAYER_0 WON']
        times_won = temp.shape[0]
        
        temp = games.loc[(games['Talia2'] == index) & (games['Gracz1'] == bot)]
        times_played = times_played + temp.shape[0]
        temp = temp.loc[temp['Wynik'] == 'PLAYER_1 WON']
        times_won = times_won + temp.shape[0]
        
        decks['prob_' + bot][index] = times_won/times_played

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [7]:
# Save to be able to access data many times without running preprocessing.
decks.to_csv('./data/adjusted_decks.csv')

In [21]:
# Get example of games
games_100_rows = games.head(100)
games_100_rows.to_csv('./data/games_100_rows.csv')

## Getting and preparing data

In [2]:
decks = pd.read_csv('./data/adjusted_decks.csv', index_col=0)
print(f'There are {decks.shape[0]} decks. Each one is represented as {decks.shape[1]} dimensional vector.\n')
print('This is how it looks like:')
decks.head()

There are 400 decks. Each one is represented as 344 dimensional vector.

This is how it looks like:


Unnamed: 0_level_0,Abyssal Enforcer,Acherus Veteran,Acidic Swamp Ooze,Acolyte of Agony,Acolyte of Pain,Al'Akir the Windlord,Alleycat,Aluneth,Ancestral Healing,Animal Companion,...,Priest,Rogue,Shaman,Warlock,Warrior,prob,prob_A1,prob_A2,prob_B1,prob_B2
deckName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
deck252103,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,1,0.408011,0.402576,0.429119,0.410319,0.391304
deck105300,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0.430965,0.469605,0.430657,0.374684,0.412121
deck822100,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0.620577,0.629283,0.609155,0.602469,0.646154
deck829295,0,0,0,0,2,0,0,0,0,0,...,0,0,0,1,0,0.54432,0.567732,0.572414,0.482051,0.548387
deck627915,0,0,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0.456177,0.456311,0.481356,0.435616,0.45614


In [3]:
# games = pd.read_csv('./data/training_games.csv', index_col=0)
games = pd.read_csv('./data/games_100_rows.csv', index_col=0)
games.head()

Unnamed: 0_level_0,Gracz1,Talia1,Gracz2,Talia2,Wynik
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
100001.0,A1,deck113225,A1,deck731599,PLAYER_0 WON
100002.0,A1,deck694943,A1,deck929572,PLAYER_1 WON
100003.0,A1,deck182567,A1,deck525929,PLAYER_0 WON
100004.0,A1,deck219364,A1,deck757429,PLAYER_1 WON
100005.0,A1,deck826229,A1,deck337123,PLAYER_1 WON


In [4]:
# Save for later
decks['prob'].to_csv('./data/prob.csv')

  


In [5]:
prob = pd.read_csv("./data/prob.csv", index_col=0)

In [6]:
zero_data = np.zeros(shape=(0, 2 * decks.shape[1]))
column_names = 2 * list(decks.columns.values)
X_train = pd.concat([decks.loc[games.loc[i]["Talia1"]].append(decks.loc[games.loc[i]["Talia2"]])
                    for i in range(100001, 100001 + games.shape[0])], axis=1, ignore_index=True)

In [7]:
X_train = X_train.transpose()

In [8]:
dummies_ai1 = pd.get_dummies(games['Gracz1']).reset_index().drop(columns=['Id'])
dummies_ai2 = pd.get_dummies(games['Gracz2']).reset_index().drop(columns=['Id'])
X_train = pd.concat([X_train, dummies_ai1, dummies_ai2], axis=1)

In [9]:
# Save for later
with open('X_train.csv', 'w') as csvfile:
    X_train.head(1000).to_csv(csvfile)

In [10]:
y = games['Wynik']

In [11]:
y_dummies = pd.get_dummies(games['Wynik']).reset_index().drop(columns=['Id'])

In [12]:
with open('./data/y_dummies_train.csv', 'w') as csvfile:
    y_dummies.head(1000).to_csv(csvfile)

In [13]:
y = y.replace(['PLAYER_0 WON', 'PLAYER_1 WON'], [0, 1])
y = y.reset_index().drop(columns=['Id'])

In [14]:
with open('./data/y_train.csv', 'w') as csvfile:
    y.head(1000).to_csv(csvfile)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y, test_size=0.1)

## Naive Bayes model

In [17]:
mnb = MultinomialNB()
mnb.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [18]:
mnb.score(X_test, y_test)

0.5

## Results:

In [59]:
test_decks = pd.read_json('../testDecks.json', lines=True)
test_decks['deckName'] = test_decks['deckName'].apply(operator.itemgetter(0))
test_decks['hero'] = test_decks['hero'].apply(operator.itemgetter(0))
column_names_single = list(decks.columns.values)
zero_data_single = np.zeros(shape=(0, decks.shape[1]))
#result = pd.DataFrame(zero_data_single, columns=column_names_single)
data = [[]]
hero_dict = {'Druid': 330, 'Hunter': 331, 'Mage': 332, 'Paladin': 333, 'Priest': 334, 'Rogue': 335, 'Shaman': 336, 'Warlock': 337, 'Warrior': 338}

for index, row in test_decks.iterrows():
    new_row = []
    
    for card in list(decks.columns):
        if card in row['cards']:
            new_row.append(row['cards'][card][0])
        else:
            new_row.append(0)
            
    new_row[hero_dict[row['hero']]] = 1
            
    data.append(new_row)

data.remove([])
result = pd.DataFrame(data, columns=column_names_single, index=test_decks.loc[:]['deckName'])
print(result.head())

            Abyssal Enforcer  Acherus Veteran  Acidic Swamp Ooze  \
deckName                                                           
deck244804                 0                0                  0   
deck124802                 0                0                  0   
deck687350                 0                0                  0   
deck517728                 0                0                  0   
deck130762                 0                0                  0   

            Acolyte of Agony  Acolyte of Pain  Al'Akir the Windlord  Alleycat  \
deckName                                                                        
deck244804                 0                1                     0         0   
deck124802                 0                2                     0         0   
deck687350                 0                1                     0         0   
deck517728                 0                2                     0         0   
deck130762                 0         

In [27]:
bots = ['A1', 'A2', 'B1', 'B2']

with open('results.csv', 'w') as csvfile:
    row_writer = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    
    for bot in bots:
        for j in range(200):
            sum_scores = 0
            
            for i in range(400):
                to_count = pd.concat([result.iloc[j], decks.iloc[i]])
                sum_scores = sum_scores + mnb.predict_proba([to_count])[0][0]
                
            mean = sum_scores / 6
            
            row_writer.writerow([bot, result.index[j], str(round(mean, 1))])