# Feature Loading

## For explainations of the features,
## refer to : https://github.com/ricoms/video_image_features

### A. The downloaded "z0N" files are first unzipped using the 7zip program in Windows.

### B.  Scan the features into the pandas frame

In [1]:
# Load the ColorHistograms of 3 images
import os
import pandas as pd
import re
import numpy as np

In [2]:
# define all the functions in this section 

def vname2ID(vnames):
    """Parse video digital id from its name
    vnames: a list contains file names"""
    vid = [ os.path.splitext(vn)[0][5:] for vn in vnames]
    return vid

def read_C3D(fname):
    """Scan vectors from file"""
    with open(fname) as f:
        for line in f:
            C3D =[float(item) for item in line.split()] # convert to float type, using default separator
    return C3D

def read_HMP(fname):
    """Scan HMP(Histogram of Motion Patterns) features from file"""
    with open(fname) as f:
        for line in f:
            pairs=line.split()
            HMP_temp = { int(p.split(':')[0]) : float(p.split(':')[1]) for p in pairs}
    # there are 6075 bins, fill zeros
    HMP = np.zeros(6075)
    for idx in HMP_temp.keys():
        HMP[idx-1] = HMP_temp[idx]            
    return HMP

def read_ColorHistogram(fname):
    """Scan Color Histogram from file
    Input file contains RGB histogram,
    Return a matrix of (3,256)"""
    RGB_Hist = np.zeros((3,256))
    with open(fname) as f:
        i_l = 0 # line index
        for line in f:
            pairs = line.split()
            hist_dict = {int(p.split(':')[0]):float(p.split(':')[1]) for p in pairs}
            for idx in hist_dict.keys():
                RGB_Hist[i_l,idx] = hist_dict[idx]
            i_l += 1
    return RGB_Hist

def read_HOG(fname):
    """Scan HOG (Histogram of Oriented Gradients) from file
    ****
    This is questionable, since its a hsitogram, calculated on 32 x 32 windows,
    why is the size is different for frames in the same video"""
    pass

def read_caps(fname):
    """Load the captions into a dataframe"""
    vn = []
    cap = []
    df = pd.DataFrame();
    with open(fname) as f:
        for line in f:
            pairs = line.split()
            vn.append(pairs[0])
            cap.append(pairs[1])
        df['video']=vn
        df['caption']=cap
    return df

    

In [3]:
Feat_path = '/home/wsun3/multimediaeval18/features/'
image_feat_names = ['ColorHistogram','HOG','InceptionV3','LBP','ORB']
video_feat_names = ['C3D','HMP']

In [4]:
# Load video related features first
# it helps with the organization of the video names
vnames = os.listdir(Feat_path+'C3D')
vid = vname2ID(vnames) # video id


FileNotFoundError: [Errno 2] No such file or directory: '/home/wsun3/multimediaeval18/features/C3D'

In [None]:
Features = pd.DataFrame({'ID': vid,
                   'C3D': [read_C3D(Feat_path+'C3D'+'/video'+item+'.txt') for item in vid],
                   'HMP':[read_HMP(Feat_path+'HMP'+'/video'+item+'.txt') for item in vid],
                    'ColorHistogram0':[read_ColorHistogram(Feat_path+'ColorHistogram'+'/video'+item+'-0.txt') for item in vid],
                    'ColorHistogram56':[read_ColorHistogram(Feat_path+'ColorHistogram'+'/video'+item+'-56.txt') for item in vid],
                    'ColorHistogram112':[read_ColorHistogram(Feat_path+'ColorHistogram'+'/video'+item+'-112.txt') for item in vid]
                        })


In [5]:
# load the ground truth values
label_path = '/home/wsun3/multimediaeval18/ground-truth/'
labels=pd.read_csv(label_path+'ground-truth_dev-set.csv')

FileNotFoundError: File b'/home/wsun3/multimediaeval18/ground-truth/ground-truth_dev-set.csv' does not exist

In [None]:
# load the captions
cap_path = '/media/win/Users/ecelab-adm/Desktop/DataSet_me18me/me18me-devset/dev-set/dev-set_video-captions.txt'
df_cap=read_caps(cap_path)
labels['caption'] = df_cap['caption']

In [6]:
# checkout the stats of the true memoribility
labels.agg(['min','max','mean','median'])[['video','short-term_memorability','long-term_memorability','caption']]

NameError: name 'labels' is not defined

In [7]:
# sort the labels in decreasing order by short term memorability
labels_sorted = labels.sort_values(axis=0,by='short-term_memorability',ascending=True)

NameError: name 'labels' is not defined

In [8]:
labels_sorted.head()

NameError: name 'labels_sorted' is not defined

In [9]:
labels_sorted.tail()

NameError: name 'labels_sorted' is not defined

In [10]:
# sort the labels in decreasing order by long term memorability
labels_sorted = labels.sort_values(axis=0,by='long-term_memorability',ascending=True)

NameError: name 'labels' is not defined

In [11]:
labels_sorted.head()

NameError: name 'labels_sorted' is not defined

In [12]:
labels_sorted.tail()

NameError: name 'labels_sorted' is not defined

In [13]:
# visualize the relationshipt between long-term and short-term memorability
import matplotlib.pyplot as plt

In [14]:
labels.plot.scatter('short-term_memorability','long-term_memorability')

NameError: name 'labels' is not defined

## There is not much expected linear relationship between long and short term memorability