In [1]:
import sys
sys.path.insert(0, r'../..')

In [2]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import imageio
import pickle
import os
from tqdm import tqdm_notebook as tqdm

In [3]:
from lib.video import *
from lib.cross_val import *
from lib.utils import convert_time

In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import LeaveOneOut, train_test_split
from sklearn.linear_model import RidgeClassifier, LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

In [5]:
MODEL_DIR = '../../output/features/image_32_16/'
Y_DATA = '../../data/train/events_data.csv'
TRAIN_FILES = np.array(['641579_3.mp4','643734_5.mp4','633012_5.mp4','631638_5.mp4','631646_5.mp4','631750_5.mp4'])
EVENTS_TYPE = ['удар по воротам', 'угловой', 'замена', 'желтая карточка', 'гол']

In [6]:
MIN_COMBO = 4

In [7]:
y_data = pd.read_csv(Y_DATA)
y_data['event_time'] = y_data['event_time'].apply(convert_time)

In [147]:
import keras
from keras.models import Sequential,Input,Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [167]:
batch_size = 64
epochs = 10
num_classes = 6
class MyNN:
    def __init__(self):
        fashion_model = Sequential()
        fashion_model.add(Conv2D(8, kernel_size=(2, 2),activation='linear',padding='same',input_shape=(16,32,3)))
        fashion_model.add(LeakyReLU(alpha=0.1))
        fashion_model.add(MaxPooling2D((2, 2),padding='same'))
        fashion_model.add(Dropout(0.1))
        fashion_model.add(Flatten())
        fashion_model.add(Dense(4, activation='linear'))
        fashion_model.add(LeakyReLU(alpha=0.1))
        fashion_model.add(Dense(num_classes, activation='softmax'))
        fashion_model.summary()
        fashion_model.compile(loss=keras.losses.categorical_crossentropy,
                              optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
        self.nn = fashion_model
    
    def fit(self, X, y):
        print(X.shape)
        one_hot_y = np.zeros((len(y), 6))
        for i in range(len(y)):
            if y[i] in EVENTS_TYPE:
                one_hot_y[EVENTS_TYPE.index(y[i]) + 1] = 1
            else:
                one_hot_y[0] = 1
        X_train, X_valid, y_train, y_valid = train_test_split(X, one_hot_y)
        fashion_train = self.nn.fit(X_train, y_train, 
                                    batch_size=batch_size,epochs=epochs,verbose=1,
                                    validation_data=(X_valid, y_valid))
    
    def predict(self, X):
        y_pred = self.nn.predict(X)
        res = []
        for y in y_pred:
            a = np.argmax(y)
            if a == 0:
                res.append('NONE')
            else:
                res.append(EVENTS_TYPE[a - 1])
        return res

In [168]:
def sampling(Xv, yv, k=1.5, max_size=17000):
    nX, ny = [], []
    bad_pairs = []
    for X, y in zip(Xv, yv):
        if y in EVENTS_TYPE:
            nX.append(X)
            ny.append(y)
        else:
            bad_pairs.append((X, y))
    bad_count = len(bad_pairs)
    for i in np.random.choice(bad_count, size=int(len(nX) * k)):
        X, y = bad_pairs[i]
        nX.append(X)
        ny.append(y)
    nX, ny = np.array(nX), np.array(ny)
    if max_size is not None and len(nX) > max_size:
        ind = np.random.choice(len(nX), size=max_size)
        nX, ny = nX[ind], ny[ind]
    return list(nX), list(ny)

In [169]:
def replace_none(y):
    return ['NONE' if v is None else v for v in y]

In [170]:
all_results = []

In [171]:
from collections import defaultdict

In [172]:
def load_Xy(files, is_sampling=True):
    X, y = [], []
    for file in files:
        path = MODEL_DIR + file + '.pickle'
        with open(path, 'rb') as f:
            Xv, yv, X_time = pickle.load(f)
        yv = replace_none(yv)
        if is_sampling:
            Xv, yv = sampling(Xv, yv)
        X += Xv
        y += yv
    X = np.array(X)
    y = np.array(y)
    return X, y

def load_X(files, k=1):
    X, X_file_names, X_time = [], [], []
    for file in files:
        path = MODEL_DIR + file + '.pickle'
        with open(path, 'rb') as f:
            Xv, yv, X_time = pickle.load(f)
        X += Xv[::k]
        X_time += X_time[::k]
        X_file_names += [file] * len(Xv[::k])
    X = np.array(X)
    return X, X_file_names, X_time

class MetaModel():
    def __init__(self, model):
        self.model = model
        self.pred_buf = {}
        self.fit_set = set()
    
    def fit(self, files):
        files = frozenset(files)
        if files in self.fit_set:
            return
        self.fit_set.add(files)
        X, y = load_Xy(files)
        print(y)
        print('Fit with', len(X), 'samples')
        self.model.fit(X, y)
        
    def score(self, files):
        X, y = load_Xy(files, False)
        print('Score with', len(X), 'samples')
        y_pred = self.model.predict(X)
        return f1_score(y, y_pred, average='micro')
        return pd.DataFrame({
            'true': y,
            'pred': y_pred
        })
        
        
    def predict(self, files):
        files = frozenset(files)
        X, X_file_names, X_time = load_X(files)
        print('Predict for', len(X), 'samples')
        if files in self.pred_buf:
            print('From pred_buf')
            y_pred = self.pred_buf[files]
        else:
            y_pred = self.model.predict(X)
            self.pred_buf[files] = y_pred
        result = []
        combo = 0
        combo_type = None
        last_event = defaultdict(lambda: -10000)
        for i in range(len(X)):
            if 0 <= X_time[i] <- 6200:
                continue
            if y_pred[i] in EVENTS_TYPE:
                if combo_type == y_pred[i]:
                    combo += 1
                else:
                    combo = 1
                combo_type = y_pred[i]
            else:
                combo = 0
            if combo == MIN_COMBO and last_event[combo_type] < X_time[i] - 59:
                result.append({
                    'file_name': X_file_names[i],
                    'event_type': combo_type,
                    'event_time': X_time[i]
                })
                last_event[combo_type] = X_time[i]
        result = pd.DataFrame(result)
        all_results.append(result)
        return result

In [173]:
def only_one_fold(cv):
    def fun(X):
        return [next(cv.split(X))]
    class s: pass
    obj = s
    s.split = fun
    return obj

In [174]:
NN = MetaModel(MyNN())
print(cross_val_score(NN, y_data, TRAIN_FILES, LeaveOneOut()).mean())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_49 (Conv2D)           (None, 16, 32, 8)         104       
_________________________________________________________________
leaky_re_lu_56 (LeakyReLU)   (None, 16, 32, 8)         0         
_________________________________________________________________
max_pooling2d_43 (MaxPooling (None, 8, 16, 8)          0         
_________________________________________________________________
dropout_58 (Dropout)         (None, 8, 16, 8)          0         
_________________________________________________________________
flatten_20 (Flatten)         (None, 1024)              0         
_________________________________________________________________
dense_39 (Dense)             (None, 4)                 4100      
_________________________________________________________________
leaky_re_lu_57 (LeakyReLU)   (None, 4)                 0         
__________

A Jupyter Widget

['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 72628 samples
(72628, 16, 32, 3)
Train on 54471 samples, validate on 18157 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Predict for 7047 samples
task_score: true_positives=0 false_positives=1 false_negatives=36
cross_val_score: score=0.0 test_files=['641579_3.mp4']
['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 73160 samples
(73160, 16, 32, 3)
Train on 54870 samples, validate on 18290 samples
Epoch 1/10

KeyboardInterrupt: 

In [None]:
ridge = MetaModel(RidgeClassifier(alpha=10, normalize=True))

In [35]:
ridge.fit(TRAIN_FILES[:2])
ans = ridge.score(TRAIN_FILES[-2:])
ans

['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 22986 samples
Score with 26446 samples


0.60561143462149281

In [36]:
MIN_COMBO = 4
print(cross_val_score(ridge, y_data, TRAIN_FILES, LeaveOneOut()).mean())

A Jupyter Widget

['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 58104 samples
Predict for 7047 samples
task_score: true_positives=12 false_positives=22 false_negatives=24
cross_val_score: score=0.20689655172413793 test_files=['641579_3.mp4']
['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 58530 samples
Predict for 8714 samples
task_score: true_positives=20 false_positives=45 false_negatives=21
cross_val_score: score=0.23255813953488372 test_files=['643734_5.mp4']
['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 58104 samples
Predict for 8618 samples
task_score: true_positives=26 false_positives=63 false_negatives=27
cross_val_score: score=0.22413793103448276 test_files=['633012_5.mp4']
['удар по воротам' 'удар по воротам' 'удар по воротам' ..., 'NONE' 'NONE'
 'NONE']
Fit with 58104 samples
Predict for 11891 samples
task_score: true_positives=8 false_positives=60 false_nega

In [19]:
all_results[-1]

Unnamed: 0,event_time,event_type,file_name
0,-297,удар по воротам,631750_5.mp4
1,13,удар по воротам,631750_5.mp4
2,77,удар по воротам,631750_5.mp4
3,141,удар по воротам,631750_5.mp4
4,201,удар по воротам,631750_5.mp4
5,261,удар по воротам,631750_5.mp4
6,325,удар по воротам,631750_5.mp4
7,419,удар по воротам,631750_5.mp4
8,481,удар по воротам,631750_5.mp4
9,550,удар по воротам,631750_5.mp4
