# <font color='brown'>Setup </font>

In [1]:
%%time
import os
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, explained_variance_score, confusion_matrix, accuracy_score, classification_report, log_loss, brier_score_loss
from sklearn.tree import DecisionTreeClassifier
from matplotlib import pyplot as plt
import lightgbm as lgb

Wall time: 993 ms


In [2]:
path = 'D:\\Documents\\GitHub\\Classification_datascience\\webscrapping\\matches\\rounds\\'
df = pd.read_csv('{}matches_csv.csv'.format(path))

In [3]:
df = df[["AtkScore", "DefScore", "ATK_Bank", "DEF_Bank", "FinalWinner"]]

In [4]:
df.head()

Unnamed: 0,AtkScore,DefScore,ATK_Bank,DEF_Bank,FinalWinner
0,0,1,300,100,1
1,0,2,8800,2800,1
2,0,3,900,14500,1
3,0,4,9200,11000,1
4,1,4,3400,19300,1


In [5]:
X = df.drop(['FinalWinner'], axis='columns')
Y = df.FinalWinner
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=15)

In [6]:
#d_train=lgb.Dataset(X_train, label=Y_train)

In [88]:

class MatchReplay:
    def __init__(self, match_id: int, input_df: pd.DataFrame, **kwargs):
        self.df: pd.DataFrame = input_df
        self.match_id: int = match_id
        self.query: pd.DataFrame = input_df.query('MatchID == {}'.format(match_id))
        self.half = False
        if 'half' in kwargs and kwargs['half']:
            self.half = True

    def get_round_table(self) -> dict:
        g = self.query[["RoundNumber", "RoundID"]]
        g.drop_duplicates()
        return dict(zip(g.RoundNumber, g.RoundID))

    def get_atk_scores(self, **kwargs) -> List[int]:
        dfm = list(self.get_round_winners().values())
        score_dict = {'atk': 0, 'def': 0}
        atk_scores = []

        slice_index = 12

        if self.half:
            slice_index = 6

        for i in dfm[:slice_index]:
            if i == 1:
                score_dict['atk'] += 1
            atk_scores.append(score_dict['atk'])
        for j in dfm[slice_index:slice_index*2]:
            if j == 0:
                score_dict['atk'] += 1
            atk_scores.append(score_dict['atk'])

        return atk_scores

    def get_def_scores(self, **kwargs) -> List[int]:
        dfm = list(self.get_round_winners().values())
        score_dict = {'atk': 0, 'def': 0}
        def_scores = []

        slice_index = 12

        if self.half:
            slice_index = 6

        for i in dfm[:slice_index]:
            if i == 0:
                score_dict['def'] += 1
            def_scores.append(score_dict['def'])
        for j in dfm[slice_index:slice_index*2]:
            if j == 1:
                score_dict['def'] += 1
            def_scores.append(score_dict['def'])

        return def_scores

    def get_round_winners(self) -> dict:
        g = self.query[["RoundNumber", "FinalWinner"]]
        g.drop_duplicates()
        return dict(zip(g.RoundNumber, g.FinalWinner))

    def get_match_winner(self) -> int:
        winner = 0
        atks = self.get_atk_scores()
        defs = self.get_def_scores()

        if self.half:
            half_dict = {'atk': atks[-1], 'def': defs[-1]}
            if half_dict['atk'] and half_dict['def'] == 6:
                winner = 2
            max_score = max(half_dict, key=half_dict.get)
            if max_score == 'atk':
                winner = 1
            elif max_score == 'def':
                winner = 0
        elif atks[-1] == 12 and defs[-1] == 12:
            winner = 2
        elif atks[-1] == 13:
            winner = 1
        elif defs[-1] == 13:
            winner = 0

        return winner

    def generate_match_dataframe(self) -> pd.DataFrame:
        r_number = pd.Series(self.get_round_table().keys())
        r_atk = pd.Series(self.get_atk_scores())
        r_def = pd.Series(self.get_def_scores())
        r_winner = pd.Series([self.get_match_winner()] * len(r_number))
        r_ids = pd.Series([self.match_id] * len(r_number))
        r_atk_bank = pd.Series(self.get_atk_bank())
        r_def_bank = pd.Series(self.get_def_bank())

        frame = {'MatchID': r_ids, 'RoundNumber': r_number, 'AtkScore': r_atk, 'DefScore': r_def,
                 'ATK_Bank': r_atk_bank, 'DEF_Bank': r_def_bank,
                 'FinalWinner': r_winner}

        d_frame = pd.DataFrame(frame)
        d_frame.dropna()

        return d_frame

    def get_all_matches(self) -> set:
        return set(self.df.MatchID)

    def get_atk_bank(self) -> List[int]:
        return [
            max(self.query.query('RoundNumber == {}'.format(r)).ATK_bank)
            for r in self.get_round_table().keys()
        ]

    def get_def_bank(self) -> List[int]:
        return [
            max(self.query.query('RoundNumber == {}'.format(r)).DEF_bank)
            for r in self.get_round_table().keys()
        ]

    def get_big_dataframe(self):
        df_list = []
        match_indexes = list(self.get_all_matches())

        for i in match_indexes:
            self.match_id = i
            print(i)
            self.query: pd.DataFrame = self.df.query('MatchID == {}'.format(i))
            df_list.append(self.generate_match_dataframe())

        merged = pd.concat(df_list)
        merged.dropna(inplace=True)
        merged["AtkScore"] = merged["AtkScore"].astype(int)
        merged["DefScore"] = merged["DefScore"].astype(int)

        return merged

    def export_big_dataframe(self):
        big_df = self.get_big_dataframe()
        big_df.to_csv(r'matches\rounds\matches_csv.csv', index=False)
        print('SUCCESS!')

In [91]:
match = 26508
path2 = 'D:\\Documents\\GitHub\\Classification_datascience\\webscrapping\\matches\\rounds\\combined_csv.csv'
data = pd.read_csv('{}'.format(path2))

mr = MatchReplay(match, data, half=True)

In [92]:
mr.get_def_scores()

[0, 0, 0, 0, 0, 0, 1, 1, 2, 3, 4, 4]

# <font color='brown'>Model </font>

In [7]:
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, Y_train)

NameError: name 'SVC' is not defined

In [None]:
svm_predictions = svm_model_linear.predict(X_test)
accuracy = svm_model_linear.score(X_test, Y_test)
cm = confusion_matrix(Y_test, svm_predictions)
cm

In [None]:
plt.figure(figsize = (8,6))
cm = (cm/cm.sum(axis=1).reshape(-1,1))

sns.heatmap(cm, cmap="YlGnBu", vmin=0., vmax=1., annot=True, annot_kws={'size':45})
plt.title("wa", fontsize = 5)
plt.ylabel('Predicted label')
plt.xlabel('True label')
plt.show()

# <font color='brown'>Metrics </font>

In [38]:
clf_pruned = DecisionTreeClassifier(criterion = "gini", random_state = 100,
                               max_depth=3, min_samples_leaf=5)
clf_pruned.fit(X_train, Y_train)
Y_pred = clf_pruned.predict(X_test)

In [40]:
from sklearn.metrics import classification_report
target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(Y_test, Y_pred, target_names=target_names))

              precision    recall  f1-score   support

     class 0       0.64      0.87      0.74      2549
     class 1       0.71      0.66      0.68      2327
     class 2       0.00      0.00      0.00       733

    accuracy                           0.67      5609
   macro avg       0.45      0.51      0.47      5609
weighted avg       0.59      0.67      0.62      5609



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
plt.figure(figsize = (8,6))
cm = (cm/cm.sum(axis=1).reshape(-1,1))

sns.heatmap(cm, cmap="YlGnBu", vmin=0., vmax=1., annot=True, annot_kws={'size':45})
plt.title("wa", fontsize = 5)
plt.ylabel('Predicted label')
plt.xlabel('True label')
plt.show()

In [44]:
df.query('FinalWinner == {}'.format(2))

Unnamed: 0,AtkScore,DefScore,ATK_Bank,DEF_Bank,FinalWinner
78,0,1,100,0,2
79,0,2,8800,5200,2
80,0,3,1700,15300,2
81,0,4,9900,27900,2
82,1,4,3600,27700,2
...,...,...,...,...,...
27710,10,10,3900,39800,2
27711,10,11,2600,35100,2
27712,11,11,2900,17300,2
27713,12,11,8000,24500,2


In [3]:
# List of numbers from 1 to 10
l1 = list(range(1, 15))
l1

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]