In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy
import plotly.express as px

from datasetCleaner import *
from YUVHandler import *
from pixelFeatures import *
from sklearn.feature_selection import mutual_info_classif, VarianceThreshold, SelectPercentile
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.model_selection import cross_val_predict, train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, f1_score
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE

In [4]:
videos = ['Jockey_1080p', 'Beauty_1080p', 'RushFieldCuts', 'Netflix_TunnelFlag', 'Vidyo4', 'Dark', 'NetflixDinnerScene', 'KristenAndSara', 'Netflix_DrivingPOV']
#['Lips', 'SunBath', 'Netflix_Dancers', 'BuildingHall', 'ToddlerFountain', 'Touchdown_pass', 
videos.reverse()
qps = ['22', '27', '32','37']

final_df = pd.DataFrame()
for video in videos:
    print('Video: ' + video)
    for qp in qps:
        df_features = pd.read_csv('../datasets/features/dataset_' + video + '_' + qp + '_features.csv', usecols = ['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth','splitType', 'cost'])    
        df_features = df_features.query('CU_width == 128 & CU_height == 128')
        height = df_features['frameHeight'].iloc[0]
        width = df_features['frameWidth'].iloc[0]
        
        df_features = df_features.query('POC > 0 & bottomRight_x < @width & bottomRight_y < @height & bottomRight_x > 0 & bottomRight_y > 0')
        df_features.drop_duplicates(inplace = True)
        df_features.reset_index(drop = True, inplace = True)

        df_features = df_features.groupby(by=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth','splitType']).min()
        df_features.reset_index(inplace = True)

        df_target = pd.read_csv('../datasets/target/dataset_' + video + '_' + qp + '_target.csv')
        df_target = df_target.query('CU_width == 128 & CU_height == 128')

        height = df_target['frameHeight'].iloc[0]
        width = df_target['frameWidth'].iloc[0]

        df_target = df_target.query('POC > 0 & bottomRight_x < @width & bottomRight_y < @height & bottomRight_x > 0 & bottomRight_y > 0')
        df_target.drop_duplicates(inplace = True)
        df_target.reset_index(drop = True, inplace = True)
        df_target.rename(columns = {'PartSplit' : 'splitType'}, inplace = True)
        
        df_target = df_target.groupby(by=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth','splitType']).min()
        df_target = df_target.reset_index()
        df_target = df_target.drop_duplicates()

        df = pd.merge(df_features, df_target, on=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth','splitType'])
        
        del df_features
        del df_target
        
        df.drop_duplicates(inplace = True)
        df.reset_index(inplace = True, drop = True)
        df['target'] = df['cost'] > df['RDCost']
        df['target'] = df['target'].apply(lambda x : 1 if x else 0)
        df = df.drop(columns = ['cost', 'RDCost'])
        
        splits = ['QT_SPLIT', 'BI_HORZ', 'BI_VERT', 'TRI_HORZ', 'TRI_VERT']
        
        for i in range(0, len(splits)):
            df['splitType'] = df['splitType'].replace(i+1, splits[i])
        
        final_df = pd.concat([final_df, df])
final_df.to_csv('../datasets/all/VTM_s0.csv')

Video: Netflix_DrivingPOV
Video: KristenAndSara
Video: NetflixDinnerScene
Video: Dark
Video: Vidyo4
Video: Netflix_TunnelFlag
Video: RushFieldCuts
Video: Beauty_1080p
Video: Jockey_1080p


In [5]:
df = pd.read_csv('../datasets/all/VTM_s0.csv')

In [7]:
df['target'].value_counts()

0    60341
Name: target, dtype: int64

In [8]:
pd.read_csv('../../VTM-9.0/features/dataset_BasketballPass_22_features.csv')

Unnamed: 0,videoname,cost,frameWidth,frameHeight,paramQP,CU_width,CU_height,topLeft_x,topLeft_y,bottomRight_x,...,quarter3GradH,quarter3GradV,quarter3RatioGrad,quarter3Sum,quarter4Var,quarter4Mean,quarter4GradH,quarter4GradV,quarter4RatioGrad,quarter4Sum
0,BasketballPass,1.700000e+308,22,0,127,0,20030.000,465.369,44896,45696,...,0.459661,1762690.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
1,BasketballPass,7.742020e+08,22,0,63,0,4191.010,546.353,16288,36576,...,0.084967,542440.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
2,BasketballPass,6.278210e+07,22,0,31,0,832.001,558.684,6560,22272,...,0.006494,146016.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
3,BasketballPass,1.840090e+07,22,0,15,0,836.365,542.797,3840,5408,...,-1.000000,-1.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
4,BasketballPass,6.842220e+06,22,0,15,0,290.609,523.625,1344,4608,...,-1.000000,-1.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121547,BasketballPass,1.401500e+06,22,400,415,5,2298.370,403.062,640,56512,...,-1.000000,-1.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
121548,BasketballPass,2.700670e+06,22,400,415,5,1730.160,403.609,63232,60448,...,-1.000000,-1.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
121549,BasketballPass,4.126620e+05,22,400,403,5,1478.430,415.438,65152,65056,...,-1.000000,-1.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
121550,BasketballPass,7.791830e+05,22,404,411,5,1667.940,401.750,63200,62688,...,-1.000000,-1.0,0,0,0,0,0,0,1.113580e-13,3.089580e-41
