In [1]:
# imports
import cv2 # opencv
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, plot_confusion_matrix, plot_roc_curve
from dataset_generator import get_waldorf_statler_mfcc_features, create_pig_image_dataset, create_swedish_chef_image_dataset

import tensorflow as tf
from tensorflow import keras

# definitons of constants/variables
video_file_glob_path = '../../videos/*.avi'
audio_base_path = '../../audio/'
pig_data_path = '../../ground_truth/pig/'
pig_label_file = pig_data_path + 'labels.txt'
swedish_chef_data_path = '../../ground_truth/swedish_chef/'
swedish_chef_label_file = swedish_chef_data_path + 'labels.txt'
evaluation_base_path = '../../evaluation/'

label_map = {0: 'kermit_the_frog',
             1: 'waldorf_and_statler',
             2: 'pig',
             3: 'swedish_chef',
             4: 'none'}

In [2]:
def load_pig_image_dataset():
    data = pd.DataFrame([], columns=['name', 'file_id', 'filename', 'pig'])

    with open(pig_label_file) as file:
        for i, line in enumerate(file):
            if i == 0:
                continue
            splits = list(map(lambda line: line.strip(), line.split(",")))
            name = splits[0] + '_' + splits[1]
            labels = [int(splits[i]) for i in range(2, len(splits)) if int(splits[i]) != 4]
        
            filename = pig_data_path + name + '_' + splits[2] + '.jpg'
            data = data.append({'name': name,
                                            'file_id': int(splits[0]),
                                            'filename': filename,
                                            'pig': 1 if 2 in labels else 0}, ignore_index=True)
    return data

def load_swedish_chef_image_dataset():
    data = pd.DataFrame([], columns=['name', 'file_id', 'filename', 'swedish_chef'])

    with open(swedish_chef_label_file) as file:
        for i, line in enumerate(file):
            if i == 0:
                continue
            splits = list(map(lambda line: line.strip(), line.split(",")))
            name = splits[0] + '_' + splits[1]
            labels = [int(splits[i]) for i in range(2, len(splits)) if int(splits[i]) != 4]
        
            filename = swedish_chef_data_path + name + '_' + splits[2] + '.jpg'
            data = data.append({'name': name,
                                            'file_id': int(splits[0]),
                                            'filename': filename,
                                            'swedish_chef': 1 if 3 in labels else 0}, ignore_index=True)
    return data


def train_test_validation_split(df):
    video1_df = df[df['file_id'] == 1]
    video2_df = df[df['file_id'] == 2]
    video3_df = df[df['file_id'] == 3]
    
    train_v1_df, test_v1_df, val_v1_df = np.split(video1_df.sample(frac=1, random_state=42), [int(.6*len(video1_df)), int(.8*len(video1_df))])
    train_v2_df, test_v2_df, val_v2_df = np.split(video2_df.sample(frac=1, random_state=42), [int(.6*len(video2_df)), int(.8*len(video2_df))]) 
    train_v3_df, test_v3_df, val_v3_df = np.split(video3_df.sample(frac=1, random_state=42), [int(.6*len(video3_df)), int(.8*len(video3_df))]) 
    
    train_df = pd.concat([train_v1_df, train_v2_df, train_v3_df]).reset_index(drop=True)
    test_df = pd.concat([test_v1_df, test_v2_df, test_v3_df]).reset_index(drop=True)
    val_df = pd.concat([val_v1_df, val_v2_df, val_v3_df]).reset_index(drop=True)
    
    return train_df, test_df, val_df


In [3]:
# create kermit image dataset if not exists (this is checked by the function itself)
create_pig_image_dataset()
create_swedish_chef_image_dataset()

# after dataset creation, load it into dataframe
df_pig = load_pig_image_dataset()
df_swedish_chef = load_swedish_chef_image_dataset()

Number of samples per character in ground truth:
kermit_the_frog: 2663
waldorf_and_statler: 301
pig: 1548
swedish_chef: 220
none: 5712
total_samples: 10444
[INFO] Start extracting images for target class: 2
[INFO] Start extracting randomly sampled images
Number of samples per character in ground truth:
kermit_the_frog: 2663
waldorf_and_statler: 301
pig: 1548
swedish_chef: 220
none: 5712
total_samples: 10444
[INFO] Start extracting images for target class: 3
[INFO] Start extracting randomly sampled images


In [4]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(pig_data_path,
                                                              

Unnamed: 0,name,file_id,filename,swedish_chef
0,1_1660,1,../../ground_truth/swedish_chef/1_1660_3.jpg,1
1,1_1672,1,../../ground_truth/swedish_chef/1_1672_3.jpg,1
2,1_1696,1,../../ground_truth/swedish_chef/1_1696_3.jpg,1
3,1_28924,1,../../ground_truth/swedish_chef/1_28924_3.jpg,1
4,1_28936,1,../../ground_truth/swedish_chef/1_28936_3.jpg,1
...,...,...,...,...
442,3_36410,3,../../ground_truth/swedish_chef/3_36410_4.jpg,0
443,3_21038,3,../../ground_truth/swedish_chef/3_21038_4.jpg,0
444,3_38414,3,../../ground_truth/swedish_chef/3_38414_4.jpg,0
445,3_19658,3,../../ground_truth/swedish_chef/3_19658_4.jpg,0


Unnamed: 0,name,file_id,filename,pig
0,1_1036,1,../../ground_truth/kermit/1_1036_0.jpg,0
1,1_1048,1,../../ground_truth/kermit/1_1048_0.jpg,0
2,1_1060,1,../../ground_truth/kermit/1_1060_0.jpg,0
3,1_1072,1,../../ground_truth/kermit/1_1072_0.jpg,0
4,1_1084,1,../../ground_truth/kermit/1_1084_0.jpg,0
...,...,...,...,...
5328,3_34238,3,../../ground_truth/kermit/3_34238_4.jpg,0
5329,3_2234,3,../../ground_truth/kermit/3_2234_4.jpg,0
5330,3_34658,3,../../ground_truth/kermit/3_34658_4.jpg,0
5331,3_14090,3,../../ground_truth/kermit/3_14090_4.jpg,0
