In [9]:
import sys
sys.path.insert(0, r'../..')

In [22]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import pandas_profiling
import numpy as np

In [103]:
from lib.scoring import task_score
from lib.utils import convert_time

In [14]:
DATA_PATH = '../../data/train/events_data.csv'

In [92]:
EVENTS_TYPE = ['удар по воротам', 'угловой', 'замена', 'желтая карточка', 'гол']
EVENTS_COUNTS = [0.502, 0.223, 0.137, 0.09, 0.048]
TOTAL_TIME = 6000

In [93]:
data = pd.read_csv(DATA_PATH) 

In [104]:
data['event_time'] = data['event_time'].apply(convert_time)

In [98]:
FILES = data['file_name'].unique()

In [101]:
def gen_prediction_every_n_rand(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': np.random.choice(EVENTS_TYPE, p=EVENTS_COUNTS),
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [112]:
best = 0
for n in [30, 60, 120, 200, 360, 600]:
    y_pred = gen_prediction_every_n_rand(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

n=30 score=0.12725490196078432
n=60 score=0.15802213001383125
n=120 score=0.1449120937666489
n=200 score=0.10092348284960422
n=360 score=0.0761300555114988
n=600 score=0.053144375553587246
Best: 0.15802213001383125


In [113]:
def gen_prediction_every_n_best(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [124]:
best = 0
for n in [30, 60, 120, 200, 360, 600]:
    y_pred = gen_prediction_every_n_best(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

n=30 score=0.09027119286933434
n=60 score=0.1644645340751043
n=120 score=0.2336394948335247
n=200 score=0.16794961511546536
n=360 score=0.11595394736842106
n=600 score=0.0721370604147881
Best: 0.2336394948335247


In [119]:
def gen_prediction_every_n_two_best(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': time
            }
            y_pred.append(y)
            y = {
                'file_name': file,
                'event_type': 'угловой',
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [121]:
best = 0
for n in [30, 60, 120, 200, 360, 600]:
    y_pred = gen_prediction_every_n_two_best(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

n=30 score=0.0697698002231011
n=60 score=0.13504442250740376
n=120 score=0.21560798548094373
n=200 score=0.17222767419038273
n=360 score=0.12923864363403711
n=600 score=0.08834729626808835
Best: 0.21560798548094373


In [116]:
def gen_prediction_rand_n_best(files, n):
    y_pred = []
    for file in files:
        for i in range(n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': np.random.randint(TOTAL_TIME)
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [118]:
best = 0
for n in [10, 30, 40, 60, 100]:
    y_pred = gen_prediction_rand_n_best(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

n=10 score=0.06350626118067978
n=30 score=0.12314939434724091
n=40 score=0.13495838287752676
n=60 score=0.14470531863919503
n=100 score=0.1344850948509485
Best: 0.14470531863919503


In [127]:
for event_type in EVENTS_TYPE:
    y_pred = data[data['event_type'] == event_type]
    print(event_type, task_score(data, y_pred))

удар по воротам 0.5015806111696522
угловой 0.22339304531085352
замена 0.136986301369863
желтая карточка 0.08956796628029505
гол 0.04847207586933614
