In [3]:
# imports
import cv2 # opencv
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from pathlib import Path
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, plot_confusion_matrix, plot_roc_curve
from dataset_generator import get_waldorf_statler_mfcc_features, create_pig_image_dataset, create_swedish_chef_image_dataset

import tensorflow as tf
from tensorflow import keras

# definitons of constants/variables
video_file_glob_path = '../../videos/*.avi'
audio_base_path = '../../audio/'
pig_data_path = '../../ground_truth/pig/'
pig_keras_path = '../../ground_truth/pig_keras/'
pig_label_file = pig_data_path + 'labels.txt'
swedish_chef_data_path = '../../ground_truth/swedish_chef/'
swedish_chef_keras_path = '../../ground_truth/swedish_chef_keras/'
swedish_chef_label_file = swedish_chef_data_path + 'labels.txt'
evaluation_base_path = '../../evaluation/'

label_map = {0: 'kermit_the_frog',
             1: 'waldorf_and_statler',
             2: 'pig',
             3: 'swedish_chef',
             4: 'none'}

In [4]:
def load_pig_image_dataset():
    data = pd.DataFrame([], columns=['name', 'file_id', 'filename', 'contains_character'])

    with open(pig_label_file) as file:
        for i, line in enumerate(file):
            if i == 0:
                continue
            splits = list(map(lambda line: line.strip(), line.split(",")))
            name = splits[0] + '_' + splits[1]
            labels = [int(splits[i]) for i in range(2, len(splits)) if int(splits[i]) != 4]
        
            filename = pig_data_path + name + '_' + splits[2] + '.jpg'
            data = data.append({'name': name,
                                            'file_id': int(splits[0]),
                                            'filename': filename,
                                            'contains_character': 1 if 2 in labels else 0}, ignore_index=True)
    return data

def load_swedish_chef_image_dataset():
    data = pd.DataFrame([], columns=['name', 'file_id', 'filename', 'contains_character'])

    with open(swedish_chef_label_file) as file:
        for i, line in enumerate(file):
            if i == 0:
                continue
            splits = list(map(lambda line: line.strip(), line.split(",")))
            name = splits[0] + '_' + splits[1]
            labels = [int(splits[i]) for i in range(2, len(splits)) if int(splits[i]) != 4]
        
            filename = swedish_chef_data_path + name + '_' + splits[2] + '.jpg'
            data = data.append({'name': name,
                                            'file_id': int(splits[0]),
                                            'filename': filename,
                                            'contains_character': 1 if 3 in labels else 0}, ignore_index=True)
    return data


def train_test_validation_split(df):
    video1_df = df[df['file_id'] == 1]
    video2_df = df[df['file_id'] == 2]
    video3_df = df[df['file_id'] == 3]
    
    train_v1_df, test_v1_df, val_v1_df = np.split(video1_df.sample(frac=1, random_state=42), [int(.6*len(video1_df)), int(.8*len(video1_df))])
    train_v2_df, test_v2_df, val_v2_df = np.split(video2_df.sample(frac=1, random_state=42), [int(.6*len(video2_df)), int(.8*len(video2_df))]) 
    train_v3_df, test_v3_df, val_v3_df = np.split(video3_df.sample(frac=1, random_state=42), [int(.6*len(video3_df)), int(.8*len(video3_df))]) 
    
    train_df = pd.concat([train_v1_df, train_v2_df, train_v3_df]).reset_index(drop=True)
    test_df = pd.concat([test_v1_df, test_v2_df, test_v3_df]).reset_index(drop=True)
    val_df = pd.concat([val_v1_df, val_v2_df, val_v3_df]).reset_index(drop=True)
    
    return train_df, test_df, val_df


In [5]:
# create pig and swedish set image dataset if not exists (this is checked by the function itself)
create_pig_image_dataset()
create_swedish_chef_image_dataset()

# after dataset creation, load it into dataframe
df_pig = load_pig_image_dataset()
df_swedish_chef = load_swedish_chef_image_dataset()

Kermit image dataset already created.
Kermit image dataset already created.


In [6]:
# Create extra directories and link the image dirs because image_datasets_from_directory expects subdirectories
Path(pig_keras_path).mkdir(exist_ok=True)
Path(swedish_chef_keras_path).mkdir(exist_ok=True)
if not os.path.exists(pig_keras_path + '/all'):
    os.symlink(pig_data_path, pig_keras_path + '/all')
    Path(pig_keras_path + '/abc').mkdir(exist_ok=True)
if not os.path.exists(swedish_chef_keras_path + '/all'):
    os.symlink(swedish_chef_data_path, swedish_chef_keras_path + '/all')
    Path(swedish_chef_keras_path + '/abc').mkdir(exist_ok=True)

labels_pig = df_pig.sort_values(by=['name'])['contains_character'].to_list()
labels_swedish_chef = df_swedish_chef.sort_values(by=['name'])['contains_character'].to_list()

data_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    rotation_range=25
    horizontal_flip=True,
    validation_split=0.2)

df_pig['contains_character'] = df_pig['contains_character'].astype('str') # needed due to class_mode=binary

train_pig = data_generator.flow_from_dataframe(
    df_pig,
    x_col='filename',
    y_col='contains_character',
    target_size=(256, 256),
    batch_size=64,
    class_mode='binary',
    subset='training')

test_pig = data_generator.flow_from_dataframe(
    df_pig,
    x_col='filename',
    y_col='contains_character',
    target_size=(256, 256),
    batch_size=64,
    class_mode='binary',
    subset='validation')

#dataset_pig = tf.keras.preprocessing.image_dataset_from_directory(pig_keras_path,
#                                                                  labels=labels_pig,
#                                                                 label_mode='int')
#dataset_swedish_chef = tf.keras.preprocessing.image_dataset_from_directory(swedish_chef_keras_path,
#                                                                           labels=labels_swedish_chef,
#                                                                          label_mode='int')
                                                              

Found 2483 validated image filenames belonging to 2 classes.
Found 620 validated image filenames belonging to 2 classes.


In [7]:
# Create the base model without top layers
vgg16 = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
vgg16.summary()

flat1 = keras.layers.Flatten()(vgg16.layers[-1].output)
class1 = keras.layers.Dense(1024, activation='relu')(flat1)
output = keras.layers.Dense(1, activation='softmax')(class1)

model = keras.models.Model(inputs=vgg16.input, outputs=output)
model.summary()

# Optional: freeze VGG16 layers
#for layer in vgg16.layers:
#    layer.trainable = False

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.BinaryAccuracy()],
)

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0     

In [None]:
model.fit(train_pig, validation_data=test_pig, epochs=2)

Epoch 1/2
 2/39 [>.............................] - ETA: 35:05 - loss: 10.2456 - binary_accuracy: 0.3281