In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier
# To add interactions in linear regressions models
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error

from vowpalwabbit import pyvw
import random

In [2]:
predDimension = "kbs"
v_names_train = np.loadtxt("train_names.csv", dtype= str)
v_names_test = np.loadtxt("test_names.csv", dtype=str)

video_features = ["WIDTH", "HEIGHT", "SPATIAL_COMPLEXITY", "TEMPORAL_COMPLEXITY", "COLOR_COMPLEXITY", "ORIG_SIZE", "ORIG_KBS"]
config_features = ["cabac", "ref", "deblock", "analyse", "me", "subme", "mixed_ref", "me_range", "trellis", 
                "8x8dct", "fast_pskip", "chroma_qp_offset", "bframes", "b_pyramid", 
                "b_adapt", "direct", "weightb", "open_gop", "weightp", "scenecut", "rc_lookahead", 
                "mbtree", "qpmax", "aq-mode"]
config_features_categorical = ['analyse', 'me', 'direct', 'deblock', 'b_pyramid', 'b_adapt', 'weightb', 'open_gop', 'scenecut', 'rc_lookahead']

In [3]:
df = pd.read_csv("all_features.csv")

In [4]:
for c in config_features_categorical:
    df[c], _ = pd.factorize(df[c])

In [5]:
df.head()

Unnamed: 0,configurationID,cabac,ref,deblock,analyse,me,subme,mixed_ref,me_range,trellis,...,etime,FILENAME,WIDTH,HEIGHT,SPATIAL_COMPLEXITY,TEMPORAL_COMPLEXITY,COLOR_COMPLEXITY,ORIG_SIZE,ORIG_DURATION,ORIG_KBS
0,1,0,1,0,0,0,0,0,16,0,...,2.14,Animation_1080P-01b3,1920,1080,0.098,0.004,0.005,1866272605,20.02,745763.278
1,101,1,2,1,1,1,6,1,16,1,...,3.4,Animation_1080P-01b3,1920,1080,0.098,0.004,0.005,1866272605,20.02,745763.278
2,102,1,2,1,1,1,6,1,16,1,...,2.71,Animation_1080P-01b3,1920,1080,0.098,0.004,0.005,1866272605,20.02,745763.278
3,103,1,2,0,2,2,6,1,16,1,...,2.78,Animation_1080P-01b3,1920,1080,0.098,0.004,0.005,1866272605,20.02,745763.278
4,104,1,16,1,1,1,6,1,24,1,...,2.74,Animation_1080P-01b3,1920,1080,0.098,0.004,0.005,1866272605,20.02,745763.278


In [6]:
# we separate the list of videos into a training (i.e. offline) set and a test set (i.e. online)
train_ind, test_ind = train_test_split([k for k in range(len(v_names_train))], test_size = 0.25)
# training set indexes
# train_index = [v[:-4] for v in v_names_train]
train_index = [v_names_train[k][:-4] for k in train_ind]
# test set indexes
test_index = [v_names_train[k][:-4] for k in test_ind]
print(len(train_index), len(test_index))

train_df = df[df.FILENAME.isin(train_index)].reset_index()
val_df = df[df.FILENAME.isin(test_index)].reset_index()

X_train = train_df[video_features + config_features]
y_train = np.array(train_df[predDimension]).reshape(-1, 1)
X_val = val_df[video_features + config_features]
y_val = np.array(val_df[predDimension]).reshape(-1, 1)

787 263


In [7]:
obj_scaler = StandardScaler()
y_train = obj_scaler.fit_transform(y_train)
y_val = obj_scaler.transform(y_val)

In [8]:
# Regression
train_examples = []
for video_name in train_df.FILENAME.unique():
    idx = train_df[train_df.FILENAME == video_name][predDimension].idxmin()
    row = train_df.iloc[idx]
    example = f"{y_train[idx][0]} | "
    for vf in video_features+config_features:
        example += f"{vf}:{row[vf]} "
    train_examples.append(example)

In [9]:
model = pyvw.vw(quiet=True)

for example in train_examples[:-5]:
    model.learn(example)

In [10]:
for ex in train_examples[-5:]:
    pred = model.predict("| " + ex.split("|")[1])
    print(pred, ex)

0.18659715354442596 -0.17442252837025632 | WIDTH:1280 HEIGHT:720 SPATIAL_COMPLEXITY:2.22 TEMPORAL_COMPLEXITY:0.47 COLOR_COMPLEXITY:0.186 ORIG_SIZE:663577707 ORIG_DURATION:20.0 ORIG_KBS:265431.082 cabac:1 ref:8 deblock:1 analyse:1 me:1 subme:4 mixed_ref:0 me_range:16 trellis:0 8x8dct:1 fast_pskip:1 chroma_qp_offset:0 bframes:0 b_pyramid:0 b_adapt:0 direct:0 weightb:0 open_gop:0 weightp:0 scenecut:0 rc_lookahead:0 mbtree:0 qpmax:69 aq-mode:1 
-0.29903724789619446 -0.26421018041303945 | WIDTH:1280 HEIGHT:720 SPATIAL_COMPLEXITY:2.39 TEMPORAL_COMPLEXITY:0.2289999999999999 COLOR_COMPLEXITY:0.109 ORIG_SIZE:829472010 ORIG_DURATION:20.02 ORIG_KBS:331457.346 cabac:1 ref:16 deblock:1 analyse:4 me:3 subme:11 mixed_ref:1 me_range:24 trellis:2 8x8dct:1 fast_pskip:0 chroma_qp_offset:-2 bframes:16 b_pyramid:1 b_adapt:2 direct:1 weightb:1 open_gop:1 weightp:2 scenecut:1 rc_lookahead:1 mbtree:0 qpmax:69 aq-mode:1 
-0.57582026720047 -0.3999576149405782 | WIDTH:1280 HEIGHT:720 SPATIAL_COMPLEXITY:0.693 TEM

In [11]:
# Bandit from collected samples
train_examples = []
for video_name in train_df.FILENAME.unique():
    idx = train_df[train_df.FILENAME == video_name][predDimension].idxmin()
    row = train_df.iloc[idx]
    example = f"{row['configurationID']}:-{1/row[predDimension]:.5f}:0.005 | "
    for vf in video_features+config_features:
        example += f"{vf}:{row[vf]} "
    train_examples.append(example)

In [12]:
print(train_examples[0])

70:-0.01634:0.005 | WIDTH:1920 HEIGHT:1080 SPATIAL_COMPLEXITY:0.098 TEMPORAL_COMPLEXITY:0.004 COLOR_COMPLEXITY:0.005 cabac:1 ref:1 deblock:0 analyse:3 me:3 subme:2 mixed_ref:0 me_range:16 trellis:2 8x8dct:1 fast_pskip:1 chroma_qp_offset:0 bframes:3 b_pyramid:1 b_adapt:1 direct:2 weightb:1 open_gop:1 weightp:1 scenecut:1 rc_lookahead:2 mbtree:1 qpmax:69 aq-mode:1 


In [13]:
model = pyvw.vw("--cb 201", quiet=True)

for example in train_examples[:-5]:
    model.learn(example)

In [14]:
for ex in train_examples[-5:]:
    pred = model.predict("| " + ex.split("|")[1])
    print(pred, ex)

177 91:-0.00015:0.005 | WIDTH:1280 HEIGHT:720 SPATIAL_COMPLEXITY:2.22 TEMPORAL_COMPLEXITY:0.47 COLOR_COMPLEXITY:0.186 cabac:1 ref:8 deblock:1 analyse:1 me:1 subme:4 mixed_ref:0 me_range:16 trellis:0 8x8dct:1 fast_pskip:1 chroma_qp_offset:0 bframes:0 b_pyramid:0 b_adapt:0 direct:0 weightb:0 open_gop:0 weightp:0 scenecut:0 rc_lookahead:0 mbtree:0 qpmax:69 aq-mode:1 
68 68:-0.00034:0.005 | WIDTH:1280 HEIGHT:720 SPATIAL_COMPLEXITY:0.693 TEMPORAL_COMPLEXITY:0.655 COLOR_COMPLEXITY:0.706 cabac:1 ref:1 deblock:1 analyse:1 me:1 subme:2 mixed_ref:0 me_range:16 trellis:0 8x8dct:1 fast_pskip:1 chroma_qp_offset:0 bframes:3 b_pyramid:1 b_adapt:1 direct:2 weightb:1 open_gop:1 weightp:1 scenecut:1 rc_lookahead:6 mbtree:1 qpmax:69 aq-mode:0 
195 195:-0.00045:0.005 | WIDTH:732 HEIGHT:720 SPATIAL_COMPLEXITY:4.128 TEMPORAL_COMPLEXITY:0.285 COLOR_COMPLEXITY:0.248 cabac:1 ref:16 deblock:1 analyse:4 me:3 subme:11 mixed_ref:1 me_range:24 trellis:2 8x8dct:1 fast_pskip:0 chroma_qp_offset:-2 bframes:16 b_pyramid

In [19]:
def performance(df, filename, predDimension, chosen_action):
    min_idx = df[(df.FILENAME == filename)][predDimension].idxmin()
    max_idx = df[(df.FILENAME == filename)][predDimension].idxmax()
    min_perf = df.iloc[min_idx][predDimension]
    max_perf = df.iloc[max_idx][predDimension]
    cur_perf = df[(df.FILENAME == filename) & (df.configurationID == chosen_action)][predDimension].values[0]
    return 1.0-(cur_perf-min_perf)/(max_perf-min_perf)

In [29]:
# Bandit with exploration
model = pyvw.vw("--cb_explore 201 -q UA --quiet --cover 10", quiet=True)


## TODO: Evaluation: achieved cost vs. best cost (on validation set or during training).

def sample_custom_pmf(pmf):
    total = sum(pmf)
    scale = 1 / total
    pmf = [x * scale for x in pmf]
    draw = random.random()
    sum_prob = 0.0
    for index, prob in enumerate(pmf):
        sum_prob += prob
        if(sum_prob > draw):
            return index, prob

def val_performance(model):
    perf_bandit = []
    perf_random = []
    
    for video_name in val_df.FILENAME.unique():
        example = "|"
        for vf in video_features:
            example += f"{vf}:{val_df[val_df.FILENAME == video_name][vf].values[0]} "

        pmf = model.predict(example)
        # TODO Replace pmf by softmax sampling or similar
        chosen_action_index, prob = sample_custom_pmf(pmf)
        act = chosen_action_index+1
        
        perf_bandit.append(performance(val_df, video_name, predDimension, act))
        perf_random.append(performance(val_df, video_name, predDimension, random.randint(1, 201)))
        
    return sum(perf_bandit)/len(perf_bandit), sum(perf_random)/len(perf_random)


for _ in range(50):
    for i, video_name in enumerate(train_df.FILENAME.unique()):
#         row = 
        example = "|"
        for vf in video_features:
            example += f"{vf}:{train_df[train_df.FILENAME == video_name][vf].values[0]} "

        pmf = model.predict(example)
        chosen_action_index, prob = sample_custom_pmf(pmf)
        act = chosen_action_index+1
    #     print(act, video_name)

        cost = -1/train_df[(train_df.FILENAME == video_name) & (train_df.configurationID == act)][predDimension].values[0]
        example = f"{chosen_action_index}:{cost:.7f}:{prob:.2f} {example}"
    #     print(example)
        vw_format = model.parse(example)
        model.learn(vw_format)
        model.finish_example(vw_format)

        if i % 100 == 0:
            print(val_performance(model))
    #     break
    

(0.735810181506565, 0.716681109203073)
(0.7077149672900137, 0.7216440634035958)
(0.7401073543258709, 0.7197357786084139)
(0.6991950624623177, 0.7445848630665668)
(0.7190966510614513, 0.7225726501602109)
(0.7240409063287633, 0.7109355448266435)
(0.7186347662821575, 0.7285743694554315)
(0.742603343434335, 0.7192013306032032)
(0.7146286860640524, 0.7022196853186183)
(0.719084207285007, 0.7498347463478041)
(0.7142828792531665, 0.7133280406761066)
(0.7174170251703506, 0.7029967665430763)
(0.7487183780185835, 0.7360470974350346)
(0.6859113806702306, 0.7455315655317661)
(0.7167550547671074, 0.7463371429924344)
(0.7160615241750087, 0.7326363982472224)
(0.7391142298400829, 0.7412655857286806)
(0.7239932854027372, 0.7077065227192465)
(0.6970097219503161, 0.7153322960486804)
(0.7044979915989832, 0.679411323416896)
(0.704386859211261, 0.7066470675152603)
(0.7041506561879515, 0.7339513399730798)
(0.7192926386255168, 0.6948090933448787)
(0.7422666241678045, 0.7221715942846866)
(0.714237151347274, 0.

KeyboardInterrupt: 

In [82]:
train_df[(train_df.FILENAME == "Animation_1080P-01b3") & (train_df.configurationID == 1)][predDimension][0]

161.07

In [47]:
# Cascading bandits
# STATUS: Currently not working, not all combinations are always available
bandits = {}
bandit_order = []

for cfg in config_features:
#     print(cfg, train_df[cfg].nunique())
    bandits[cfg] = (pyvw.vw(f"--cb_explore {train_df[cfg].nunique()} -q UA --quiet --epsilon 0.2", quiet=True), 
                    list(train_df[cfg].unique()))
    bandit_order.append((cfg, train_df[cfg].nunique()))
    
# Smallest options first / non-ideal order, but a start
bandit_order = [c for c, _ in sorted(bandit_order, key=lambda x: x[1])]

perf_monitor = []

for _ in range(50):
    for i, video_name in enumerate(train_df.FILENAME.unique()):
        base_example = "| "
        for vf in video_features:
            base_example += f"{vf}:{train_df[train_df.FILENAME == video_name][vf].values[0]} "
        
        example_trace = []
        selector = (train_df.FILENAME == video_name)
        
        for cfg in bandit_order:
            model, values = bandits[cfg]
            example = str(base_example)
            pmf = model.predict(example)
            act_index, prob = sample_custom_pmf(pmf)
            
            example_trace.append((cfg, values[act_index], act_index, prob, example))
#             print(cfg, values[act_index])
            selector = selector & (train_df[cfg] == values[act_index])
            
            if train_df[selector][predDimension].size == 0:
                # Can't run this configuration
                cost = 100
                example = f"{act_index}:{cost:.7f}:{prob:.2f} {example}"
                vw_format = model.parse(example)
                model.learn(vw_format)
                model.finish_example(vw_format)
                break
        
            base_example += f"{cfg}:{values[act_index]} "
        
        if train_df[selector][predDimension].size == 0:
            # Can't run this configuration
            continue
        cost = -1/train_df[selector][predDimension].values[0]
#         print(cost, example_trace)
    
        perf_monitor.append(cost)
    
        for cfg, _, act_index, prob, example in example_trace:
            example = f"{act_index}:{cost:.7f}:{prob:.2f} {example}"
            model, _ = bandits[cfg]
            vw_format = model.parse(example)
            model.learn(vw_format)
            model.finish_example(vw_format)
            
#         break
        
        if len(perf_monitor) == 50:
            print(sum(perf_monitor)/len(perf_monitor))
            perf_monitor = []
#     break