In [1]:
import pandas as pd
import os
import numpy as np
import cv2

from tqdm import tqdm_notebook

#from keras.applications.xception import Xception
from keras.applications.vgg16 import VGG16

Using TensorFlow backend.


In [2]:
class VideoToFrames:
    def __init__(self, video_path, no_frames=30):
        self.reader = cv2.VideoCapture(video_path)

        self.no_frames = no_frames
        self.total_frames = int(self.reader.get(cv2.CAP_PROP_FRAME_COUNT))
        if self.total_frames != 0:
            self.interval = int(self.total_frames/self.no_frames)

    def convert(self):
        frames = []
        frame_number = 1
        while len(frames) < self.no_frames:
            self.reader.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
            ret, frame = self.reader.read()

            try:
                if frame == None:
                    break
            except ValueError:
                pass

            frames.append(cv2.resize(frame, (48, 48)))
            frame_number += self.interval

        return frames

In [3]:
image_feature_extractor = VGG16(weights='imagenet', include_top=False, input_shape=(48, 48, 3))
image_feature_extractor.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 48, 48, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0         
__________

In [4]:
video_dir = '../dataset/videoFolder/'
video_shape = (30, 512)

In [5]:
writer = open('video_features.csv', 'w')
writer.write('{}\t{}\n'.format('link', 'features'))

13

In [6]:
videolinks = list(pd.read_csv('../dataset/vine_labeled_cyberbullying_data.csv').videolink)
url_to_postid = pd.read_csv('../dataset/urls_to_postids.txt')

In [7]:
len(videolinks), len(url_to_postid)

(970, 970)

In [8]:
def link_to_name(link):
    return list(url_to_postid[url_to_postid.videolink == link].postid)[0]

In [9]:
def image_to_features(images):
    features = image_feature_extractor.predict(np.array(images))
    return np.array(features).reshape((30, 512))

In [10]:
for videolink in tqdm_notebook(videolinks):
    try:
        videoname = link_to_name(videolink)
        converter = VideoToFrames(video_path=os.path.join(video_dir, '{}.mp4'.format(videoname)), no_frames=video_shape[0])

        frames = converter.convert()
        features = image_to_features(frames)

        writer.write('{}\t{}\n'.format(videolink, features))
    except ValueError:
        pass

HBox(children=(IntProgress(value=0, max=970), HTML(value='')))




In [11]:
writer.close()

In [14]:
df = pd.read_csv('video_features.csv', sep='\t')

In [15]:
df.head()

Unnamed: 0,"link,featureshttps://vine.co/v/OgK06TtBUXz/embed/simple",[[ 0. 0. 0. ... 0. 0. 31.81314 ]
0,[ 0. 0. 0. ... 0. ...,
1,[ 0. 0. 0. ... 0. ...,
2,...,
3,[ 0. 0. 0. ... 0. ...,
4,[ 0. 0. 0. ... 0. ...,
