In [1]:
import sys
sys.path.insert(0, r'../..')

In [2]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import pandas_profiling
import numpy as np

In [3]:
from lib.scoring import task_score
from lib.utils import convert_time

In [4]:
DATA_PATH = '../../data/train/events_data.csv'

In [5]:
EVENTS_TYPE = ['удар по воротам', 'угловой', 'замена', 'желтая карточка', 'гол']
EVENTS_COUNTS = [0.502, 0.223, 0.137, 0.09, 0.048]
TOTAL_TIME = 6000

In [6]:
data = pd.read_csv(DATA_PATH) 

In [7]:
data['event_time'] = data['event_time'].apply(convert_time)

In [8]:
FILES = data['file_name'].unique()

In [9]:
','.join(FILES)

'640612_5.mp4,640208_5.mp4,640202_5.mp4,640196_5.mp4,643734_5.mp4,639919_5.mp4,639900_5.mp4,640085_5.mp4,639939_5.mp4,639933_5.mp4,640606_5.mp4,641579_3.mp4,640680_5.mp4,640674_5.mp4,640600_3.mp4,640668_5.mp4,633020_5.mp4,633012_5.mp4,632243_5.mp4,632255_5.mp4,631762_5.mp4,631750_5.mp4,631646_5.mp4,631638_5.mp4'

In [10]:
def gen_prediction_every_n_all(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            for event in EVENTS_TYPE:
                y = {
                    'file_name': file,
                    'event_type': event,
                    'event_time': time
                }
                y_pred.append(y)
    return pd.DataFrame(y_pred)

In [11]:
def gen_prediction_every_n_const(files, n, event):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': event,
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [15]:
total = 0
counts = {}
for event in EVENTS_TYPE:
    best = 0
    best_n = 60
    print(event)
    for n in [30, 60, 120, 200, 360, 600]:
        y_pred = gen_prediction_every_n_const(FILES, n, event)
        score = task_score(data, y_pred) * 5
        if best < score:
            best = score
            best_n = n
        print(f'n={n} score={score}')
    print('Best:', best, event)
    counts[event] = best_n
    total += best
print(total / 5)
print(counts)

удар по воротам
task_score: true_positives=476 false_positives=4324 false_negatives=0
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=30 score=0.09916666666666668
task_score: true_positives=473 false_positives=1927 false_negatives=3
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=60 score=0.19683728672492717
task_score: true_positives=407 false_positives=793 false_negatives=69
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=120 score=0.3207249802994484
task_score: true_positives=240 false_positives=480 false_negatives=236
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=200 score=0.2510460251046025
task_score: true_positives=141 false_positives=267 false_negatives=335
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=360 sc

In [10]:
def gen_prediction_every_n_rand(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': np.random.choice(EVENTS_TYPE, p=EVENTS_COUNTS),
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [11]:
best = 0
for n in [30, 60, 120, 200, 360, 600]:
    y_pred = gen_prediction_every_n_rand(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

task_score: true_positives=450 false_positives=2017 false_negatives=26
task_score: true_positives=119 false_positives=868 false_negatives=93
task_score: true_positives=53 false_positives=614 false_negatives=77
task_score: true_positives=32 false_positives=421 false_negatives=53
task_score: true_positives=7 false_positives=219 false_negatives=39
n=30 score=0.09031667210928304
task_score: true_positives=322 false_positives=869 false_negatives=154
task_score: true_positives=63 false_positives=452 false_negatives=149
task_score: true_positives=26 false_positives=306 false_negatives=104
task_score: true_positives=15 false_positives=226 false_negatives=70
task_score: true_positives=3 false_positives=118 false_negatives=43
n=60 score=0.09208838884740801
task_score: true_positives=207 false_positives=384 false_negatives=269
task_score: true_positives=39 false_positives=228 false_negatives=173
task_score: true_positives=16 false_positives=167 false_negatives=114
task_score: true_positives=6 fal

In [12]:
def gen_prediction_every_n_best(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [13]:
best = 0
for n in [30, 60, 120, 200, 360, 600]:
    y_pred = gen_prediction_every_n_best(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

task_score: true_positives=476 false_positives=4324 false_negatives=0
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=30 score=0.019833333333333335
task_score: true_positives=473 false_positives=1927 false_negatives=3
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=60 score=0.03936745734498544
task_score: true_positives=407 false_positives=793 false_negatives=69
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=120 score=0.06414499605988969
task_score: true_positives=240 false_positives=480 false_negatives=236
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=200 score=0.0502092050209205
task_score: true_positives=141 false_positives=267 false_negatives=335
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=360 score=0.03795423

In [14]:
def gen_prediction_every_n_two_best(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': time
            }
            y_pred.append(y)
            y = {
                'file_name': file,
                'event_type': 'угловой',
                'event_time': time
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [15]:
best = 0
for n in [30, 60, 120, 200, 360, 600]:
    y_pred = gen_prediction_every_n_two_best(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

task_score: true_positives=476 false_positives=4324 false_negatives=0
task_score: true_positives=212 false_positives=4588 false_negatives=0
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=30 score=0.028666666666666667
task_score: true_positives=473 false_positives=1927 false_negatives=3
task_score: true_positives=211 false_positives=2189 false_negatives=1
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=60 score=0.056943467340820494
task_score: true_positives=407 false_positives=793 false_negatives=69
task_score: true_positives=187 false_positives=1013 false_negatives=25
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=120 score=0.09467560830478763
task_score: true_positives=240 false_positives=480 false_negatives=236
task_score: true_positives=111 false_positives=609 false_negatives=101
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=200 score=0.07724939990520796
task_scor

In [16]:
def gen_prediction_rand_n_best(files, n):
    y_pred = []
    for file in files:
        for i in range(n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': np.random.randint(TOTAL_TIME)
            }
            y_pred.append(y)
    return pd.DataFrame(y_pred)

In [17]:
best = 0
for n in [10, 30, 40, 60, 100]:
    y_pred = gen_prediction_rand_n_best(FILES, n)
    score = task_score(data, y_pred)
    best = max(best, score)
    print(f'n={n} score={score}')
print('Best:', best)

task_score: true_positives=65 false_positives=175 false_negatives=411
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=10 score=0.019969278033794162
task_score: true_positives=189 false_positives=531 false_negatives=287
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=30 score=0.03753723932472691
task_score: true_positives=234 false_positives=726 false_negatives=242
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=40 score=0.0389351081530782
task_score: true_positives=315 false_positives=1125 false_negatives=161
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=60 score=0.03935040599625234
task_score: true_positives=404 false_positives=1996 false_negatives=72
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
n=100 score=0.0326860

In [18]:
for event_type in EVENTS_TYPE:
    y_pred = data[data['event_type'] == event_type]
    print(event_type, task_score(data, y_pred))

task_score: true_positives=476 false_positives=0 false_negatives=0
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
удар по воротам 0.2
task_score: empty y_pred
task_score: true_positives=212 false_positives=0 false_negatives=0
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
угловой 0.2
task_score: empty y_pred
task_score: empty y_pred
task_score: true_positives=130 false_positives=0 false_negatives=0
task_score: empty y_pred
task_score: empty y_pred
замена 0.2
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: true_positives=85 false_positives=0 false_negatives=0
task_score: empty y_pred
желтая карточка 0.2
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: empty y_pred
task_score: true_positives=46 false_positives=0 false_negatives=0
гол 0.2


In [19]:
def gen_prediction_every_n_best_plus_replace(files, n):
    y_pred = []
    for file in files:
        for time in range(n // 2, TOTAL_TIME, n):
            y = {
                'file_name': file,
                'event_type': 'удар по воротам',
                'event_time': time
            }
            y_pred.append(y)
        
        y = {
            'file_name': file,
            'event_type': 'замена',
            'event_time': 45 * 60
        }
        y_pred.append(y)
    return pd.DataFrame(y_pred)

In [20]:
y_pred = gen_prediction_every_n_best_plus_replace(FILES, 120)
score = task_score(data, y_pred)
score

task_score: true_positives=407 false_positives=793 false_negatives=69
task_score: empty y_pred
task_score: true_positives=9 false_positives=15 false_negatives=121
task_score: empty y_pred
task_score: empty y_pred


0.07655878916333796