In [None]:
import time
import random
import glob

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import load_model


In [None]:
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Percentage Error')
  plt.plot(hist['epoch'], hist['mean_absolute_percentage_error'],
           label='Train Error')
#   plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
#            label = 'Val Error')
#   plt.ylim([0,max(hist['val_mean_absolute_error'].max(), hist['mean_absolute_error'].max())])
  plt.legend()
    
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error')
  plt.plot(hist['epoch'], hist['mean_absolute_error'],
           label='Train Error')
#   plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
#            label = 'Val Error')
#   plt.ylim([0,max(hist['val_mean_absolute_error'].max(), hist['mean_absolute_error'].max())])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error')
  plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
#   plt.plot(hist['epoch'], hist['val_mean_squared_error'],
#            label = 'Val Error')
#   plt.ylim([0,max(hist['val_mean_squared_error'].max(), hist['mean_squared_error'].max())])
  plt.legend()
  plt.show()


In [None]:
X_COLS = [
    'weight', 'intra_error', 'frame_avg_wavelet_energy', 
    'coded_error', 'sr_coded_error', 'tr_coded_error',
    'pcnt_inter', 'pcnt_motion', 'pcnt_second_ref', 
    'pcnt_third_ref', 'pcnt_neutral', 'inactive_zone_rows',
    'inactive_zone_cols', 'MVr', 'mvr_abs', 
    'MVc', 'mvc_abs', 'MVrv', 
    'MVcv', 'mv_in_out_count', 'new_mv_count', 
    'raw_error_stdev',
    'cq_value'
]

X_COLS = X_COLS + [f'nxt_{s}' for s in X_COLS]

Y_COLS = [
    'ms_ssim', 'psnr', 'ssim', 'vmaf'
#     'vmaf'
#     'ms_ssim', 'psnr', 'ssim'
]

# Y_COLS = ['vmaf']

display(f'{len(X_COLS)=}, {len(Y_COLS)=}')

In [None]:
def load_data():
    csvs = [f for f in glob.glob(f"dataset/csv/*.csv")]
#     csvs = [csvs[0]]
    csvs.sort()
    print(csvs)
    dfs = [pd.read_csv(f) for f in csvs]  # .diff().dropna()
#     for df in dfs:
#         df[X_COLS] = df[X_COLS].shift(-1) - df[X_COLS]
    dfs = [pd.concat([df, df.shift(1).add_prefix('nxt_')], axis=1) for df in dfs]
    return pd.concat(dfs, axis=0, ignore_index=True).dropna()

In [None]:
df = load_data()
display(df)

In [None]:
def build_model():
    model = keras.Sequential([
        layers.Dense(16, activation=tf.nn.relu, input_shape=[len(X_COLS)]),
        layers.Dense(64, activation=tf.nn.relu),
        layers.Dense(16, activation=tf.nn.relu),
        layers.Dense(len(Y_COLS))
    ])
    
    optimizer = tf.keras.optimizers.Nadam(learning_rate=0.005)
    
    # loss: mean_squared_error or mean_absolute_error
    model.compile(loss='mean_absolute_error',
                  optimizer=optimizer,
                  metrics=['mean_absolute_percentage_error', 'mean_absolute_error', 'mean_squared_error'])
    return model


In [None]:
x_train = df[X_COLS]
y_train = df[Y_COLS]
y_train['ssim'] *= 100
y_train['ms_ssim'] *= 100

x_train = (x_train - x_train.mean()) / x_train.std()
y_train = (y_train - y_train.mean()) / y_train.std()

display(x_train)
display(y_train)

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((x_train.values, y_train.values))
train_dataset = dataset.shuffle(len(x_train)).batch(2048)

In [None]:
model = build_model()
display(model.summary())

In [None]:
# model.fit(train_dataset, epochs=10)
history = model.fit(train_dataset, epochs=400)  # callbacks=[]

In [None]:
plot_history(history)

In [None]:
def testp(start, end):
    
    tx = df.loc[start:end, X_COLS]
    ty = df.loc[start:end, Y_COLS]
    
    display(tx)
    display(ty)
    
    display(model.predict([tx]))

testp(0, 5)
testp(189000, 189000 + 5)