In [11]:
# imports
import keras 
import cv2 # opencv
import glob
import os
from pathlib import Path
import matplotlib.pyplot as plt
from skimage import color, feature
import skimage.data
import pandas as pd
from moviepy.editor import *
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

# definitons of constants/variables
video_file_glob_path = '../../videos/*.avi'
image_base_path = '../../images/'
audio_base_path = '../../audio/'
video_path = '../../ground_truth/Muppets-02-01-01/'
label_file = '../../ground_truth/Muppets-02-01-01/Muppets-02-01-01.txt'

In [9]:
# capture the videos and extract frames
for video_path in glob.glob(video_file_glob_path):
    cap = cv2.VideoCapture(video_path)
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    image_folder_path = image_base_path + os.path.basename(video_path).split('.')[0]
    Path(image_folder_path).mkdir(parents=True, exist_ok=True)
    
    while(cap.isOpened()):
        frame_id = cap.get(cv2.CAP_PROP_POS_FRAMES)
        ret, frame = cap.read()
        # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not ret:
            print('Failed to read frame %d of video %r.', frame_id, video_path)
            break
            
        filename = image_folder_path + '/frame%d.jpg' % frame_id
        cv2.imwrite(filename, frame)
        
        if frame_id == 300:
            break

In [7]:
# extract audio from avi video
Path(audio_base_path).mkdir(parents=True, exist_ok=True)
for video_path in glob.glob(video_file_glob_path):
    video = VideoFileClip(video_path)
    filename = audio_base_path + os.path.basename(os.path.normpath(video_path)).split('.')[0] + '.wav'
    video.audio.write_audiofile(filename)

chunk:   0%|                                                               | 3/34118 [00:00<24:55, 22.81it/s, now=None]

MoviePy - Writing audio in ../../audio/Muppets-02-01-01.wav


                                                                                                                       

MoviePy - Done.




In [13]:
# this section hold useful helper functions and transformers

# method for image resizing and extracting vector of raw values
def extract_value_vector(image, size=(128, 128)):
    return cv2.resize(image, size).flatten()

def extract_colour_histogram(image, bins=(32, 32, 32)):
    # convert image to hsv space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # extract color histogram from image and take all 3 channels into account
    color_hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    
    # normalize resulting histogram
    cv2.normalize(color_hist, color_hist)
    
    # print resulting historgram
    # plt.hist(image.ravel(),256,[0,256])
    # cols = ('b', 'g', 'r')
    # for i, col in enumerate(cols):
    #     histr = cv2.calcHist([image],[i],None,[256],[0,256])
    #     plt.plot(histr,color = col)
    #     plt.xlim([0,256])
    # plt.show()
    
    # return histogram as feature vector
    return color_hist.flatten()

def print_data_distribution(df):
    kermits = sum(df['kermit'])
    waldorf_statlers = sum(df['waldorf_and_statler'])
    pigs = sum(df['pig'])
    swedish_chefs = sum(df['swedish_chef'])
    nones = len(df) - (kermits + waldorf_statlers + pigs + swedish_chefs)
    
    print('Dataset statistics:')
    print('Kermit:\t\t\t' + str(kermits))
    print('Waldorf/Statler:\t' + str(waldorf_statlers))
    print('Pigs:\t\t\t' + str(pigs))
    print('Swedish Chef:\t\t' + str(swedish_chefs))
    print('None:\t\t\t' + str(nones))
    print('Total:\t\t\t' + str(len(df)))

print_data_distribution(data)

Dataset statistics:
Kermit:			432
Waldorf/Statler:	150
Pigs:			119
Swedish Chef:		140
None:			2383
Total:			3224


In [12]:
label_map = {0: 'kermit_the_frog',
                 1: 'waldorf_and_statler',
                 2: 'pig',
                 3: 'swedish_chef',
                 4: 'none'}

data = pd.DataFrame([], columns=["name", "filename", "kermit", "waldorf_and_statler", "pig", "swedish_chef"])

with open(label_file) as file:
    lines = file.readlines()
    
for line in lines:
    splits = list(map(lambda line: line.strip(), line.split(",")))
    name = splits[0]
    labels = [int(splits[i]) for i in range(1, len(splits)) if int(splits[i]) != 4]
        
    filename = glob.glob(video_path + name + "*")[0]
    data = data.append({'name': name,
                        'filename': filename,
                        'kermit': 1 if 0 in labels else 0,
                        'waldorf_and_statler': 1 if 1 in labels else 0,
                        'pig': 1 if 2 in labels else 0,
                        'swedish_chef': 1 if 3 in labels else 0}, ignore_index=True)

#data.to_csv("data.csv")
    

In [15]:
## Identifying Kermit

df1 = data.copy()

# init data structures for feature vectors, image pixel values and labels
hist_features = []
images_raw_pixels = []
labels = []

print('[INFO] start preprocessing images (color histogram)')

# iterate over all provided images and extract features, labels and image pixels
for idx, row in df1.iterrows():
    img = cv2.imread(row['filename'])
    
    # extract color distribution of the image
    img_color_hist = extract_colour_histogram(img)
    
    # extract raw pixels of the image
    img_pixels = extract_value_vector(img)
    
    hist_features.append(img_color_hist)
    images_raw_pixels.append(img_pixels)
    
    # 0 if not kermit, 1 if kermit in the image
    labels.append(row['kermit'])
    
    # show process
    if idx % 200 == 0:
        print('[INFO] images preprocessed %d/%d' % (idx, len(df1)))

# split data into training and test sets for raw pixel data
x_train_raw, x_test_raw, y_train_raw, y_test_raw = train_test_split(images_raw_pixels, labels, test_size=0.3)

# split data into training and test sets for histogram features
x_train_hist, x_test_hist, y_train_hist, y_test_hist = train_test_split(hist_features, labels, test_size=0.3)

# variables for classifier configuration
k = 20

print('[INFO] Start training and evaluating classifier for Kermit the frog')

# train a KNN classifier on raw pixel data
print('[INFO] Train KNN model for raw pixels')
model_raw = KNeighborsClassifier(n_neighbors=k)
model_raw.fit(x_train_raw, y_train_raw)
print('[INFO] Predict test data for raw pixels')
y_pred_raw = model_raw.predict(x_test_raw)
acc_knn_raw = accuracy_score(y_test_raw, y_pred_raw)
f1_knn_raw = f1_score(y_test_raw, y_pred_raw, average='weighted')
print('[INFO] KNN hyperparameter: k=%d', k)
print('[INFO] KNN raw pixel acc: ' + str(acc_knn_raw))
print('[INFO] KNN raw pixel f1-score: ' + str(f1_knn_raw))


# train a KNN classifier on histogram features
print('[INFO] Train KNN model for color histogram features')
model_hist = KNeighborsClassifier(n_neighbors=k)
model_hist.fit(x_train_hist, y_train_hist)
y_pred_hist = model_hist.predict(x_test_hist)
print('[INFO] Predict test data for color histogram')
acc_knn_hist = accuracy_score(y_test_hist, y_pred_hist)
f1_knn_hist = f1_score(y_test_hist, y_pred_hist, average='weighted')

print('[INFO] KNN hyperparameter: k=%d', k)
print('[INFO] KNN color histogram acc: ' + str(acc_knn_hist))
print('[INFO] KNN raw pixel f1-score: ' + str(f1_knn_hist))

[INFO] start preprocessing images (color histogram)
[INFO] images preprocessed %d/%d 0 3224
[INFO] images preprocessed %d/%d 200 3224
[INFO] images preprocessed %d/%d 400 3224
[INFO] images preprocessed %d/%d 600 3224
[INFO] images preprocessed %d/%d 800 3224
[INFO] images preprocessed %d/%d 1000 3224
[INFO] images preprocessed %d/%d 1200 3224
[INFO] images preprocessed %d/%d 1400 3224
[INFO] images preprocessed %d/%d 1600 3224
[INFO] images preprocessed %d/%d 1800 3224
[INFO] images preprocessed %d/%d 2000 3224
[INFO] images preprocessed %d/%d 2200 3224
[INFO] images preprocessed %d/%d 2400 3224
[INFO] images preprocessed %d/%d 2600 3224
[INFO] images preprocessed %d/%d 2800 3224
[INFO] images preprocessed %d/%d 3000 3224
[INFO] images preprocessed %d/%d 3200 3224
[INFO] Start training and evaluating classifier for Kermit the frog
[INFO] Train KNN model for raw pixels
[INFO] KNN hyperparameter: k=%d 20
[INFO] KNN raw pixel acc: 0.9648760330578512
[INFO] KNN raw pixel f1-score: 0.96477

In [11]:
## Identifying Waldorf/Statler