### [Bibliotecas]

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from YUVHandler import *
import pandas as pd
import seaborn as sns

from sklearn.feature_selection import mutual_info_classif, VarianceThreshold
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.model_selection import cross_val_predict, train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from imblearn.under_sampling import RandomUnderSampler


### [Leitura de YUV]

In [None]:
video_file = '../../../(YUV) Videos/KristenAndSara_1280x720_60.yuv'
width = 1280
height = 720

# Aquisição das matrizes Y, U e V
yuv_handle = YUVHandler(video_file, width, height)
Y, U, V = yuv_handle.get_frame_arrays()
yuv_handle.plot_frame(Y[0], U[0], V[0])

# Padding 
padding_Y = np.pad(Y[0], ( (0,32), (0,64)), 'edge')

### [VarPix] 
- Variancia de amostras de luminância

In [None]:
varPix = []
for h in range(1, height, 128):
    for w in range(1, width, 128):
        varPix = np.append( varPix, np.var(padding_Y[h-1:h+126,w-1:w+126]))

### [Mean]
- Média dos pixeis

In [None]:
mean = []
for h in range(1, height, 128):
    for w in range(1, width, 128):
        mean = np.append( mean, np.mean(padding_Y[h-1:h+126,w-1:w+126]))

### [Grad] 
- Gradientes nas direções horizontal e vertical das amostras de luminância

In [None]:
grad_x = []
grad_y = []

for h in range(1, height, 128):
    for w in range(1, width, 128):
        Gx, Gy = np.gradient(padding_Y[h-1:h+126,w-1:w+126])
        grad_x = np.append(grad_x, Gx.sum())
        grad_y = np.append(grad_y, Gy.sum())

### [RatioGrad] 
- Razão entre gradientes

In [None]:
ratioGrad = grad_x /grad_y

### [QuarterVarPix]
- VarPix nos sub-blocos da CU

In [None]:
quarterVarPix = []

for h in range(1, height, 128):
    for w in range(1, width, 128):
        
        quarter = []
        for i in range(h, h+126, 64):
            for j in range(w, w+126, 64):
                quarter = np.append( quarter, np.var(padding_Y[i-1:i+62,j-1:j+62]) )
        quarterVarPix = np.append(quarterVarPix, quarter)

### [Dataset - ExtractionClass]

In [None]:
class Extract_Features:
    def __init__(self, height, width, frame):
        self.height = height
        self.width = width
        self.Y = frame
        self.block_size_height = 0
        self.block_size_width = 0
        self.frame_padding = 0
        
    def setBlock(self, block_size):
        
        self.block_size_height = int(block_size.split('x')[0])
        self.block_size_width = int(block_size.split('x')[1])
        self.frame_padding = np.pad(self.Y, ( (0,(height % self.block_size_height)), 
                                             (0,(width % self.block_size_width)) ), 'edge')
        #self.frame_padding = np.pad(self.Y, ( (0,32), (0,64)), 'edge')
            
    def setDataFrame(self, array, columns):
        
        df = pd.DataFrame(columns = columns)
        for i in range(len(columns)):
            df[columns[i]] = array[:,i]
        
        return df
    
    def getVariance(self, block_size):
        self.setBlock(block_size)
        varPix = np.array([[None]*5])

        for h in range(0, self.height, self.block_size_height):
            for w in range(0, self.width, self.block_size_width):
                line = np.append([w, h, w+(self.block_size_width-1), h+(self.block_size_height-1)], 
                                np.var(self.frame_padding[h:h+(self.block_size_height-1), 
                                                             w:w+(self.block_size_width-1)]))
                varPix = np.vstack((varPix, line))
        
        return self.setDataFrame(varPix[1:,], ['topLeft_x', 'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'VarPix'])
    
    def getGradients(self, block_size):
        self.setBlock(block_size)
              
        grad = np.array([[None]*7])
        
        for h in range(0, self.height, self.block_size_height):
            for w in range(0, self.width, self.block_size_width):
                Gx, Gy = np.gradient(self.frame_padding[h:h+(self.block_size_height-1), 
                                                             w:w+(self.block_size_width-1)])
                line = [w, h, w+(self.block_size_width-1), h+(self.block_size_height-1), Gx.sum(), Gy.sum(), (Gx.sum()/Gy.sum())]
                grad = np.vstack((grad, line))
        return self.setDataFrame(grad[1:,], ['topLeft_x', 'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'Grad_x', 'Grad_y', 'RatioGrad'])
    
    def getQuarterPos(self, h, w, df_quarter, df_block):
        quarter = df_quarter[ df_quarter['topLeft_x'] >= w ]
        quarter = quarter[ quarter['topLeft_y'] >= h ]
        quarter = quarter[ quarter['bottomRight_x'] <= w + (self.block_size_width - 1) ]
        quarter = quarter[ quarter['bottomRight_y'] <= h + (self.block_size_height - 1)]
        
        quarter.reset_index(inplace = True, drop = True)

        feature = df_block[ df_block['topLeft_x'] == w ]
        feature = feature[ feature['topLeft_y'] == h ]
        feature = feature[ feature['bottomRight_x'] == w + (self.block_size_width  - 1) ]
        feature = feature[ feature['bottomRight_y'] == h + (self.block_size_height - 1) ]
        
        feature.reset_index(inplace = True, drop = True)
        
        return quarter, feature
                
    def getQuarterVariance(self, block_size, quarter_block_size, varPix, grads, mean):
        v = pd.DataFrame()
        g = pd.DataFrame()
        m = pd.DataFrame()
        quarterVar = dataset.getVariance(quarter_block_size)
        quarterGrad = dataset.getGradients(quarter_block_size)
        quarterMean = dataset.getMean(quarter_block_size)
        
        self.setBlock(block_size)
        pos = ['topLeft', 'topRight', 'bottomLeft', 'bottomRight']
        
        for h in range(0, self.height, self.block_size_height):
            for w in range(0, self.width, self.block_size_width):
                
                quarterVar_aux, varPix_aux = self.getQuarterPos(h, w, quarterVar, varPix)
                quarterGrad_aux, grad_aux = self.getQuarterPos(h, w, quarterGrad, grads)
                quarterMean_aux, mean_aux = self.getQuarterPos(h, w, quarterMean, mean)
                
                for i in range(1,len(pos)+1):
                    if quarterVar_aux.shape[0] >= i:
                        varPix_aux['QuarterVarPix_' + pos[i-1]] = quarterVar_aux['VarPix'][i-1]
                        grad_aux['QuarterGrad_x_' + pos[i-1]] = quarterGrad_aux['Grad_x'][i-1]
                        grad_aux['QuarterGrad_y_' + pos[i-1]] = quarterGrad_aux['Grad_y'][i-1]
                        grad_aux['QuarterRatioGrad_' + pos[i-1]] = quarterGrad_aux['RatioGrad'][i-1]
                        mean_aux['QuarterMean_' + pos[i-1]] = quarterMean_aux['Mean'][i-1]
                 
                v = pd.concat([v, varPix_aux], sort=False)
                g = pd.concat([g, grad_aux], sort=False)
                m = pd.concat([m, mean_aux], sort=False)
                d = pd.merge(v,g)
        return pd.merge(d,m)
    
    def getInconsistency(self, df, feature):
        max_lines = df.shape[0]
        HI = np.array([[None]*1])
        VI = np.array([[None]*1])
        
        for i in range(max_lines):
            f1 = df.loc[i,'Quarter' + feature + '_topLeft']
            f2 = df.loc[i, 'Quarter' + feature + '_topRight']
            f3 = df.loc[i, 'Quarter' + feature + '_bottomLeft']
            f4 = df.loc[i, 'Quarter' + feature + '_bottomRight']
            
            if f3 != None or f4 != None:

                h = abs(f1 - f2) + abs(f3 - f4)
                v = abs(f1 - f3) + abs(f2 - f4)

                HI = np.vstack((HI, h))
                VI = np.vstack((VI, v))
            else:
                HI = np.vstack((HI, None))
                VI = np.vstack((VI, None))
        return HI,VI
    
    def getMean(self, block_size):
        self.setBlock(block_size)
        mean = np.array([[None]*5])

        for h in range(0, self.height, self.block_size_height):
            for w in range(0, self.width, self.block_size_width):
                line = np.append([w, h, w+(self.block_size_width-1), h+(self.block_size_height-1)], 
                                np.mean(self.frame_padding[h:h+(self.block_size_height-1), 
                                                             w:w+(self.block_size_width-1)]))
                mean = np.vstack((mean, line))
        
        return self.setDataFrame(mean[1:,], ['topLeft_x', 'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'Mean'])
        

### [Dataset Image]

In [None]:
df_image = pd.DataFrame()

for i in range(1,16):

    dataset = Extract_Features(height, width, Y[i])
    varPix = dataset.getVariance('128x128')
    grads = dataset.getGradients('128x128')
    mean = dataset.getMean('128x128')
    v = dataset.getQuarterVariance('128x128', '64x64', varPix, grads, mean)
    
    df = pd.merge(varPix, grads)
    df = pd.merge(df, v)
    
    HI, VI = dataset.getInconsistency(df, 'VarPix')
    df = pd.concat([df, pd.DataFrame(HI[1:,], columns=['HI_VarPix'])], axis = 1)
    df = pd.concat([df, pd.DataFrame(VI[1:,], columns=['VI_VarPix'])], axis = 1)

    HI, VI = dataset.getInconsistency(df, 'RatioGrad')
    df = pd.concat([df, pd.DataFrame(HI[1:,], columns=['HI_RatioGrad'])], axis = 1)
    df = pd.concat([df, pd.DataFrame(VI[1:,], columns=['VI_RatioGrad'])], axis = 1)
    
    HI, VI = dataset.getInconsistency(df, 'Mean')
    df = pd.concat([df, pd.DataFrame(HI[1:,], columns=['HI_Mean'])], axis = 1)
    df = pd.concat([df, pd.DataFrame(VI[1:,], columns=['VI_Mean'])], axis = 1)
    
    df['DiffInconsPix'] = df['HI_VarPix'] - df['VI_VarPix']
    df['DiffInconsRatioGrad'] = df['HI_RatioGrad'] - df['VI_RatioGrad']
    df['DiffInconsMean'] = df['HI_Mean'] - df['VI_Mean']
    df['GOP'] = i
    
    df_image = pd.concat([df_image, df], sort=False)
    
df_image.dropna(inplace = True)
df_image.shape


### [Dataset VVC]

In [None]:
def organizeDataset(df, keep, drop):
    
    df.drop_duplicates(inplace = True)
    
    errata = df[df['depth'] == -1]
    errata = errata[errata['RDCostBefore'] > -1]
    errata.reset_index(inplace=True, drop=True)
    certa = df[df['depth'] > -1]
    certa.reset_index(inplace=True, drop=True)

    errata.drop(columns=['frameWidth', 'frameHeight', 'depth', 'qtdepth', 'mtdepth','qp', 'predMode', 'skip', 'mmvdSkip','affine', 'affineType', 'colorTransform','geoFlag','bdpcmMode','bdpcmModeChroma','imv','rootCbf','mipFlag','modeType','modeTypeSeries','splitSeries','cost','dist','fracBits','baseQP', 'prevQP','currQP','lumaCost','opts','maxCostAllowed','tlMaxDepth', 'tMaxDepth','trMaxDepth','lMaxDepth', 'previousMaxDepth', 'averageDepth', 'modeDepth', 'highestDepth', 'interIMVRDCost', 'interRDCost', 'affineMergeRDCost','cachedResultRDCost','mergeRDCost','mergeGeoRDCost','intraRDCost','previousSkipFlag'], inplace = True)
    certa.drop(columns= ['splitColumn','RDCostBefore','RDCostAfter'], inplace = True)

    result = pd.merge(certa, errata, on=['CU_width', 'CU_height', 'topLeft_x', 'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'GOP'])
    
    best = pd.DataFrame(columns = result.columns)
    best = result.drop_duplicates(subset = ['frameWidth', 'frameHeight', 'CU_width', 'CU_height', 'topLeft_x',
           'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'depth', 'qtdepth',
           'mtdepth', 'qp', 'predMode', 'skip', 'mmvdSkip', 'affine', 'affineType',
           'colorTransform', 'geoFlag', 'bdpcmMode', 'bdpcmModeChroma', 'imv',
           'rootCbf', 'mipFlag', 'modeType', 'modeTypeSeries', 'splitSeries',
           'cost', 'dist', 'fracBits', 'baseQP', 'prevQP', 'currQP', 'lumaCost',
           'GOP', 'opts', 'maxCostAllowed', 'tlMaxDepth', 'tMaxDepth',
           'trMaxDepth', 'lMaxDepth', 'previousMaxDepth', 'averageDepth',
           'modeDepth', 'highestDepth', 'interIMVRDCost', 'interRDCost',
           'affineMergeRDCost', 'cachedResultRDCost', 'mergeRDCost',
           'mergeGeoRDCost', 'intraRDCost', 'previousSkipFlag',
           'RDCostBefore'], keep = keep)
    if drop:
        best.drop(columns = ['frameWidth', 'frameHeight', 'depth', 'qtdepth',
           'mtdepth', 'qp', 'predMode', 'skip', 'mmvdSkip', 'affine', 'affineType',
           'colorTransform', 'geoFlag', 'bdpcmMode', 'bdpcmModeChroma', 'imv',
           'rootCbf', 'mipFlag', 'modeType', 'modeTypeSeries', 'splitSeries',
           'cost', 'dist', 'fracBits', 'baseQP', 'prevQP', 'currQP', 'lumaCost',
           'opts', 'maxCostAllowed', 'tlMaxDepth', 'tMaxDepth',
           'trMaxDepth', 'lMaxDepth', 'previousMaxDepth', 'averageDepth',
           'modeDepth', 'highestDepth', 'interIMVRDCost', 'interRDCost',
           'affineMergeRDCost', 'cachedResultRDCost', 'mergeRDCost',
           'mergeGeoRDCost', 'intraRDCost', 'previousSkipFlag',
           'RDCostBefore', 'RDCostAfter'], inplace = True)
    return best

In [None]:
#vvc_df = pd.read_csv('dataset_128.csv', usecols = ['topLeft_x', 'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'splitColumn', 'VideoName', 'ParamQP', 'GOP'])
#vvc_df = vvc_df[vvc_df['VideoName'] == 'KristenAndSara_']
#vvc_df = vvc_df[vvc_df['ParamQP'] == 22]
#vvc_df.reset_index(inplace = True, drop = True)
#vvc_df.drop_duplicates(inplace = True)
#vvc_df.shape

qps = [22, 27, 32, 37]
vvc_df = pd.DataFrame()

for qp in qps:

    df_qp = organizeDataset(pd.read_csv('../files/KristenAndSara_' + str(qp) + '_Lindino.csv'), 'last', True)
    df_qp = df_qp[df_qp['CU_width'] == 128]
    df_qp.drop(columns=['CU_width', 'CU_height'], inplace = True)
    df_qp['QP'] = qp
    
    vvc_df = pd.concat([vvc_df, df_qp], sort = False)
vvc_df.shape

### [Merge]

In [None]:
df_all = pd.merge(df_image, vvc_df, on=['topLeft_x', 'topLeft_y', 'bottomRight_x', 'bottomRight_y', 'GOP'])
df_all.rename(columns={'GOP':'POC'}, inplace = True)
df_all

### [Features Selection]

In [None]:
def MI(df, plot=True):
    x = df.drop(columns= 'splitColumn', axis = 1)
    constant_filter = VarianceThreshold(threshold=0.01)
    constant_filter.fit(x)
    xFilter = constant_filter.transform(x)
    columnsFilter = x.columns[constant_filter.get_support()]

    x_T = xFilter.T
    x_T = pd.DataFrame(x_T)
    
    duplicatedFeatures = x_T.duplicated()
    featuresToKeep = [not index for index in duplicatedFeatures]

    xUnique = x_T[featuresToKeep].T
    columnsUnique = columnsFilter[featuresToKeep]

    xUnique.columns = columnsUnique
    
    X = xUnique
    Y = df['splitColumn']
    
    if plot:

        mi = mutual_info_classif(X, Y)
        mi = pd.Series(mi)
        mi.index = X.columns
        mi.sort_values(ascending = False, inplace = True)
        plt.ylabel('Mutal Information (MI)')
        plt.title('Model 128x128 QP 22')
        mi.plot.bar(figsize = (16,5), color='red', grid=True)

    return X, Y

X, Y = MI(df_all[df_all['QP'] == 22])

### [Training]

In [None]:
def simpleTraining(X, Y, imblearn, ax, title):
    
    if imblearn:
        undersample = RandomUnderSampler(sampling_strategy='all', random_state = 0)
        X, Y = undersample.fit_resample(X, Y)
    
    X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=0, test_size = 0.25)
    clf = RF(random_state=0)
    clf.fit(X_train, y_train)
    predicts = clf.predict(X_test)
    cm = sns.heatmap(confusion_matrix(y_test, predicts, normalize='true'), annot = True, xticklabels=['No Split', 'Split'], yticklabels=['No Split','Split'], ax = ax)
    
    if ax == None:
        plt.title('Confusion Matrix - Model 128x128 ' + title)
    else:
        ax.set_title('Confusion Matrix - Model 128x128 ' + title)

    
    return f1_score(y_test, predicts)

In [72]:
features = pd.read_csv('../files/dataset_BasketballPass_50_features_dentro.csv',index_col = False, 
                       usecols=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth', 'cost', 'splitType'])

target = pd.read_csv('../files/dataset_BasketballPass_50_target_dentro.csv')

In [60]:
df = pd.merge(features, target, on=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth'])
df.drop_duplicates(inplace = True)
df = df.groupby(by=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth']).min()
df.reset_index(inplace = True)
df['target'] = df['cost'] > df['CU_RDCost']
df['target'] = df['target'].apply(lambda x : 1 if x else 0)
df

Unnamed: 0,videoname,paramQP,frameWidth,frameHeight,CU_width,CU_height,topLeft_x,topLeft_y,bottomRight_x,bottomRight_y,POC,qtdepth,mtdepth,cost,splitType,CU_RDCost,CS_RDCost,target
0,BasketballPass,50,416,240,4,4,32,232,35,235,0,3,3,2941600,3,1433429,1433429,1
1,BasketballPass,50,416,240,4,4,44,232,47,235,0,3,3,2941600,3,1433429,1433429,1
2,BasketballPass,50,416,240,4,4,256,104,259,107,0,3,3,1462555,3,827977,827977,1
3,BasketballPass,50,416,240,4,4,268,104,271,107,0,3,3,1462555,3,827977,827977,1
4,BasketballPass,50,416,240,4,4,284,64,287,67,0,3,3,1308162,4,843424,843424,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3180,BasketballPass,50,416,240,128,128,0,0,127,127,2,0,0,5555621,1,5555621,5555621,0
3181,BasketballPass,50,416,240,128,128,0,0,127,127,3,0,0,4659838,1,4659838,4659838,0
3182,BasketballPass,50,416,240,128,128,256,0,383,127,1,0,0,3120571,1,3120571,3120571,0
3183,BasketballPass,50,416,240,128,128,256,0,383,127,2,0,0,4868805,1,4868805,4868805,0


In [61]:
features = pd.read_csv('../files/dataset_BasketballPass_50_features_fora.csv',index_col = False, 
                       usecols=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth', 'cost', 'splitType'])

target = pd.read_csv('../files/dataset_BasketballPass_50_target_fora.csv')

df_1 = pd.merge(features, target, on=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth'])
df_1.drop_duplicates(inplace = True)
df_1 = df_1.groupby(by=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth']).min()
df_1.reset_index(inplace = True)
df_1['target'] = df_1['cost'] > df_1['CU_RDCost']
df_1['target'] = df_1['target'].apply(lambda x : 1 if x else 0)
df_1

Unnamed: 0,videoname,paramQP,frameWidth,frameHeight,CU_width,CU_height,topLeft_x,topLeft_y,bottomRight_x,bottomRight_y,POC,qtdepth,mtdepth,cost,splitType,CU_RDCost,CS_RDCost,target
0,BasketballPass,50,416,240,4,4,32,232,35,235,0,3,3,2941600,3,1433429,1433429,1
1,BasketballPass,50,416,240,4,4,44,232,47,235,0,3,3,2941600,3,1433429,1433429,1
2,BasketballPass,50,416,240,4,4,256,104,259,107,0,3,3,1462555,3,827977,827977,1
3,BasketballPass,50,416,240,4,4,268,104,271,107,0,3,3,1462555,3,827977,827977,1
4,BasketballPass,50,416,240,4,4,284,64,287,67,0,3,3,1308162,4,843424,843424,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5080,BasketballPass,50,416,240,128,128,128,0,255,127,2,0,0,2346980,1,2346980,2346980,0
5081,BasketballPass,50,416,240,128,128,128,0,255,127,3,0,0,2222026,1,2222026,2222026,0
5082,BasketballPass,50,416,240,128,128,256,0,383,127,1,0,0,3120571,1,3120571,3120571,0
5083,BasketballPass,50,416,240,128,128,256,0,383,127,2,0,0,4868805,1,4868805,4868805,0


In [69]:
features = pd.read_csv('../files/dataset_BasketballPass_50_features.csv',index_col = False, 
                       usecols=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth', 'cost', 'splitType'])

target = pd.read_csv('../files/dataset_BasketballPass_50_target.csv')

df_2 = pd.merge(features, target, on=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth'])
df_2.drop_duplicates(inplace = True)
df_2 = df_2.groupby(by=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth']).min()
df_2.reset_index(inplace = True)
df_2['target'] = df_2['cost'] > df_2['CU_RDCost']
df_2['target'] = df_2['target'].apply(lambda x : 1 if x else 0)
df_2

Unnamed: 0,videoname,paramQP,frameWidth,frameHeight,CU_width,CU_height,topLeft_x,topLeft_y,bottomRight_x,bottomRight_y,POC,qtdepth,mtdepth,cost,splitType,CU_RDCost,CS_RDCost,target
0,BasketballPass,50,416,240,4,4,32,232,35,235,0,3,3,2941600,3,1433429,1433429,1
1,BasketballPass,50,416,240,4,4,44,232,47,235,0,3,3,2941600,3,1433429,1433429,1
2,BasketballPass,50,416,240,4,4,256,104,259,107,0,3,3,1462555,3,827977,827977,1
3,BasketballPass,50,416,240,4,4,268,104,271,107,0,3,3,1462555,3,827977,827977,1
4,BasketballPass,50,416,240,4,4,284,64,287,67,0,3,3,1308162,4,843424,843424,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3180,BasketballPass,50,416,240,128,128,0,0,127,127,2,0,0,5555621,1,5555621,5555621,0
3181,BasketballPass,50,416,240,128,128,0,0,127,127,3,0,0,4659838,1,4659838,4659838,0
3182,BasketballPass,50,416,240,128,128,256,0,383,127,1,0,0,3120571,1,3120571,3120571,0
3183,BasketballPass,50,416,240,128,128,256,0,383,127,2,0,0,4868805,1,4868805,4868805,0


In [71]:
features['CU_width'].value_counts()

16     5749
8      5353
32     2036
4      1214
64      224
128      35
Name: CU_width, dtype: int64

In [76]:
features = pd.read_csv('../files/dataset_KristenAndSara_22_features.csv',index_col = False, 
                       usecols=['videoname','paramQP','frameWidth','frameHeight','CU_width','CU_height','topLeft_x','topLeft_y','bottomRight_x','bottomRight_y','POC','qtdepth','mtdepth', 'cost', 'splitType'])

In [77]:
features['CU_width'].value_counts()

8      278291
16     171532
4      125495
32      51423
64      10377
128       564
0         129
Name: CU_width, dtype: int64