In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

pd.set_option('display.max_columns', 50)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# raw = pd.read_csv('/kaggle/input/league-of-legends-challenger-ranked-games2020/Challenger_Ranked_Games.csv')
raw = pd.concat([
    pd.read_csv('/kaggle/input/league-of-legends-challenger-ranked-games2020/GrandMaster_Ranked_Games.csv'),
    pd.read_csv('/kaggle/input/league-of-legends-challenger-ranked-games2020/Challenger_Ranked_Games.csv'),
    pd.read_csv('/kaggle/input/league-of-legends-challenger-ranked-games2020/Master_Ranked_Games.csv'),
])
print(raw.shape)

In [None]:
from sklearn.base import (BaseEstimator, TransformerMixin)


class Deduplicator(TransformerMixin, BaseEstimator):
    def __init__(self, unique):
        self._unique = unique
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X.drop_duplicates(self._unique)
    
    
class Dropper(TransformerMixin, BaseEstimator):
    def __init__(self, columns):
        self._columns = columns
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X.drop(self._columns, axis=1)
    

class FieldTransformer(TransformerMixin, BaseEstimator):
    """
    Each field is set to the respective value of (blue - red).
    """

    label = None

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        inp = pd.DataFrame()

        # gameDuration has no predictive power.

        # Transform blue|red columns.
        blue_columns = [x for x in raw.columns if x.startswith('blue')]
        red_columns = [x.replace('blue', 'red') for x in blue_columns]
        for b, r in zip(blue_columns, red_columns):
            if r in X.columns:
                # Both in blue and red.
                name = b.replace('blue', '')
                col = X[b] - X[r]
                inp[name] = col
                
        # Take out the label column.
        self.label = inp['Wins']
        return inp.drop('Wins', axis=1)


from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


pipeline = Pipeline([
    ('dedup', Deduplicator('gameId')),
    ('fields', FieldTransformer()),
    # ('drop', Dropper(['WardPlaced', 'Wardkills'])),
    ('scaler', StandardScaler()),
])


def label(pipeline):
    return pipeline.steps[1][1].label.values

In [None]:
# Split data.
from sklearn.model_selection import train_test_split
train, test = train_test_split(raw, test_size=0.3)
print(train.shape, test.shape)

In [None]:
# from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression

Xtrain = pipeline.fit_transform(train)
ytrain = label(pipeline)

reg = LogisticRegression()
reg.fit(Xtrain, ytrain)


from sklearn.model_selection import cross_val_score
cross_val_score(LogisticRegression(), Xtrain, ytrain)

In [None]:
def describe(reg, X, y):
    assert(X.shape[0] == y.shape[0])
    yhat = reg.predict(X)
    good = (yhat == y).sum()
    total = len(y)
    percentage = good / total
    print(f'{good}/{total} good predictions: {percentage:%}')


# Describe train set.
describe(reg, Xtrain, ytrain)

# Describe test set.
Xtest = pipeline.transform(test)
ytest = label(pipeline)
describe(reg, Xtest, ytest)