In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import cv2
import re
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import librosa

from PIL import Image
import IPython.display as ipd

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

from tqdm import tqdm
from utils import io, train
from utils import feature_engineering as fe

In [None]:

videos_folder = "../data_full/videos"
gt_folder = "../data_full/gt_annotations"
temp_folder = "../data_full/tmp/"

In [None]:
episode_names, video_paths, gt_df = io.get_init_vars(gt_folder, videos_folder)

In [None]:
episode_names

# Preparation

Idea: use straight lines for the detection of Balcony Gentlemens, since Balcony has a very characteristical straight lines. Can we use Hough Transform in Sim1? 

In [None]:
import math
import cv2 as cv
img_path = '../data/frames/Muppets-03-04-03/frame_09404.jpg' 
src = cv.imread(img_path, cv.IMREAD_GRAYSCALE)    
dst = cv.Canny(src, 50, 200, None, 3)
    
cdst = cv.cvtColor(dst, cv.COLOR_GRAY2BGR)
cdstP = np.copy(cdst)

lines = cv.HoughLinesWithAccumulator(dst, 1, np.pi / 180, 200, None, 0, 0)

if lines is not None:
    for i in range(0, len(lines)):
        rho = lines[i][0][0]
        theta = lines[i][0][1]
        a = math.cos(theta)
        b = math.sin(theta)
        x0 = a * rho
        y0 = b * rho
        pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
        pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
        cv.line(cdst, pt1, pt2, (0,0,255), 3, cv.LINE_AA)
        if i == 3:
            break

plt.subplot(1,2,1)
plt.imshow(src)
plt.subplot(1,2,2)
plt.imshow(cdst)

In [None]:
# line_feat_dict = {}

# for ep in episode_names:
#     cap = io.load_video(video_paths[ep])
#     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#     line_num_list = []  
#     max_votes_list = []
    
#     for i in range(frame_count):
#         _, frame = cap.read()

#         dst = cv2.Canny(frame, 50, 200, None, 3)
#         cdst = cv2.cvtColor(dst, cv2.COLOR_GRAY2BGR)
#         cdstP = np.copy(cdst)
#         lines = cv2.HoughLinesWithAccumulator(dst, 0.5, np.pi / 180, 200, None, 0, 0)
        
#         line_feat = len(lines) if lines is not None else 0
#         max_votes = lines[0][0][2] if lines is not None else 0

#         line_num_list.append(line_feat)
#         max_votes_list.append(max_votes)

#     line_feat_dict[ep] = {}
#     line_feat_dict[ep]['num_lines'] =  np.array(line_num_list)[1:]
#     line_feat_dict[ep]['max_votes'] =  np.array(max_votes_list)[1:]

#     del line_num_list
#     del max_votes_list
#     cap.release()

Compute blobs:

In [None]:
radius_col = 2
n_max = 3
debug = False
white_params = {
    'mu_v': 200, 
    'sigma_v': 40, 
    'mu_h': 30, 
    'sigma_h': 30
}

radien_feat_dict = {}

for ep in episode_names:
    cap = io.load_video(video_paths[ep])
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    green_radien_list = []  
    white_radien_list = []
    
    for i in tqdm(range(frame_count)):
        _, image = cap.read()

        image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image_rbg = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if debug:
            plt.imshow(image_rbg)

        sigma = 7
        green_yellow = np.uint8([[[255*0.85, 255, 0]]])
        hsv_green_yellow = cv2.cvtColor(green_yellow, cv2.COLOR_RGB2HSV)
        mu = hsv_green_yellow[0, 0, 0]

        blob_list = fe.detect_blob(image_hsv, sigma, mu, debug=debug)
        blob_radius = blob_list[:, radius_col]
        biggest_radien_green = blob_radius[np.argsort(blob_radius)[-n_max:]]

        keypoints = fe.detect_blob_cv(image_gray, image, image_hsv, debug=debug, **white_params)
        k_radien = np.array([k.size for k in keypoints])
        biggest_radien_eyes = k_radien[np.argsort(k_radien)[-n_max:]]

    radien_feat_dict[ep] = {}
    radien_feat_dict[ep]['num_lines'] =  np.array(green_radien_list)[1:]
    radien_feat_dict[ep]['max_votes'] =  np.array(white_radien_list)[1:]

    del green_radien_list
    del white_radien_list
    cap.release()


In [None]:
pickle.dump(radien_feat_dict, open("../data/features/radien_feat_dict.pkl", "wb"))

In [None]:
img_path = '../data/frames/Muppets-03-04-03/frame_00000.jpg'
image = cv2.imread(img_path)
image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image_rbg = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if debug:
    plt.imshow(image_rbg)

sigma = 7
green_yellow = np.uint8([[[255*0.85, 255, 0]]])
hsv_green_yellow = cv2.cvtColor(green_yellow, cv2.COLOR_RGB2HSV)
mu = hsv_green_yellow[0, 0, 0]

# blob_list = fe.detect_blob(image_hsv, sigma, mu, debug=debug)
# blob_radius = blob_list[:, radius_col]
# biggest_radien_green = blob_radius[np.argsort(blob_radius)[-n_max:]]

# keypoints = fe.detect_blob_cv(image_gray, image, image_hsv, debug=debug, **white_params)
# k_radien = np.array([k.size for k in keypoints])
# biggest_radien_eyes = k_radien[np.argsort(k_radien)[-n_max:]]

In [None]:
ep

In [None]:
# from sklearn.cluster import MiniBatchKMeans
# from scipy.cluster.vq import whiten
# n_clusters = 5
# batch_size = 2048

# hue_feat_list = {}

# for ep in ['Muppets-03-04-03']:
#     cap = io.load_video(video_paths[ep])
#     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
#     hue_list = []

#     for i in tqdm(range(frame_count)):
#         _, image = cap.read()

#         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#         hue = image[:, :, 0].flatten()
#         hue_std = hue.std()
#         if hue_std == 0:
#             hue_list.append([[0, 0] for _ in range(n_clusters)])
#             continue
#         scaled_hue = np.expand_dims(whiten(hue), -1)

#         km = MiniBatchKMeans(n_clusters = n_clusters, batch_size=batch_size, n_init='auto').fit(scaled_hue)
#         cluster_centers = km.cluster_centers_

#         dominant_hues = []
#         for cluster_center in cluster_centers:
#             hue_scaled = cluster_center[0]
        
#             # Convert each standardized value to scaled value
#             dominant_hues.append(
#                 hue_scaled * hue_std,
#             )

#         hues = np.asarray(dominant_hues, dtype='uint8')

#         percentage = np.asarray(np.unique(km.labels_, return_counts = True)[1], dtype='float32')
#         percentage = percentage/(image.shape[0]*image.shape[1])

#         if len(percentage) < n_clusters:
#             percentage = np.append(percentage, np.zeros(n_clusters - len(percentage)))

#         dom = [[percentage[ix], hues[ix]] for ix in range(km.n_clusters)]
#         dominance = sorted(dom, key=lambda x:x[0], reverse=True)

#         hue_list.append(dominance)

#         del km

#     hue_feat_list[ep] = {}
#     hue_feat_list[ep]['hue_list'] =  np.array(hue_list)[1:]

#     del hue_list
#     cap.release()


In [None]:
hue_feat_list = pickle.load(open("../data/features/hue_feat_list_full.pkl", "rb"))

Compute audio features:

In [None]:
ep_dfs = []
for ep in episode_names:
    rec, sr = librosa.load(video_paths[ep], sr=None)

    frame_size_ms = 400
    hop_length = int(1/25 * sr)
    frame_length = int(frame_size_ms / 1000 * sr)
    
    desired_len = len(gt_df[gt_df.episode==ep])
    zcr = librosa.feature.zero_crossing_rate(y=rec, frame_length=frame_length, hop_length=hop_length)
    zcr = np.pad(zcr, pad_width=((0, 0), (0, desired_len - zcr.shape[1]))).flatten()

    rms = librosa.feature.rms(y=rec, frame_length=frame_length, hop_length=hop_length)
    rms = np.pad(rms, pad_width=((0, 0), (0, desired_len - rms.shape[1]))).flatten()

    mfcc = librosa.feature.mfcc(y=rec, sr=sr, n_fft=frame_length, hop_length=hop_length)
    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, desired_len - mfcc.shape[1])))

    ep_df = pd.DataFrame()

    ep_df['zcr'] = zcr
    ep_df['rms'] = rms
    
    for i in range(mfcc.shape[0]):
        ep_df[f'mfcc_{i}'] = mfcc[i]
    
    ep_df['episode'] = ep
    ep_dfs.append(ep_df)

feat_df = pd.concat(ep_dfs)

Compute feature dicts:
- Dominant Color
- Line Features

In [None]:
color_feat_dict = fe.get_dominant_color(episode_names, video_paths, path_to_save=None)

Add dominant color to the dataframe: 

In [None]:
for ep in episode_names:
    for i in range(color_feat_dict[ep].shape[1]):
        feat_df.loc[feat_df['episode'] == ep, f'dc_{i}'] = color_feat_dict[ep][:, i]

Add line features to the dataframe:

In [None]:
for ep in episode_names:
    for line_feat_type in ['num_lines', 'max_votes']:
        feat_df.loc[feat_df['episode'] == ep, line_feat_type] = line_feat_dict[ep][line_feat_type]

In [None]:
for ep in episode_names:
    current_hue_feat_list = hue_feat_list[ep]['hue_list']
    for i, col_name in enumerate(['percentage', 'hue']):
        for k in range(current_hue_feat_list.shape[1]):
            feat_df.loc[feat_df['episode'] == ep, f'{col_name}_{k}'] = current_hue_feat_list[:, k, i]

In [None]:
feat_df

In [None]:
# Code to listen to audio
# ipd.Audio(rec, rate=sr)

# Training & Evaluation

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

## Inner CV

### Kermit

In [None]:
train_configs = [
    {
        'train_cols': ['zcr'],
        'target_col': 'Kermit',
        # 'model_name': 'RF',
        # 'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'model_name': 'DT',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': ['rms'],
        'target_col': 'Kermit',
        # 'model_name': 'RF',
        # 'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'model_name': 'DT',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': [f'mfcc_{i}' for i in range(20)],
        'target_col': 'Kermit',
        # 'model_name': 'RF',
        # 'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'model_name': 'DT',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)],
        'target_col': 'Kermit',
        # 'model_name': 'RF',
        # 'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'model_name': 'DT',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': ['zcr', 'rms'] + [f'mfcc_{i}' for i in range(20)] + [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)],
        'target_col': 'Kermit',
        # 'model_name': 'RF',
        # 'model': RandomForestClassifier(random_state=42, n_jobs=-1),
        'model_name': 'DT',
        'model': DecisionTreeClassifier(random_state=42)
    }
]

In [None]:
eval_dfs = []
model_infos = {}

for train_config in train_configs:
    config = ', '.join({re.search(r'(\w+)_\d+', c).group(1) if re.search(r'(\w+)_\d+', c) is not None else c for c in train_config['train_cols']})
    print(config)
    eval_df, model_info = train.train_eval_inner_cv(**train_config, config=config, feat_df=feat_df, gt_df=gt_df, episode_names=episode_names)

    eval_dfs.append(eval_df)
    model_infos[config] = model_info

eval_df = pd.concat(eval_dfs)

In [None]:
eval_df

In [None]:
eval_dfs = []
model_infos = {}

for train_config in train_configs:
    config = ', '.join({re.search(r'(\w+)_\d+', c).group(1) if re.search(r'(\w+)_\d+', c) is not None else c for c in train_config['train_cols']})
    print(config)
    eval_df, model_info = train.train_eval_2_to_1(**train_config, config=config, feat_df=feat_df, gt_df=gt_df, episode_names=episode_names)

    eval_dfs.append(eval_df)
    model_infos[config] = model_info

eval_df = pd.concat(eval_dfs)
eval_df

In [None]:
eval_df.to_csv("../data/eval/DT_Kermit.csv")

In [None]:
train_config = {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)],
        'target_col': 'Kermit',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
}

config = ', '.join({re.search(r'(\w+)_\d+', c).group(1) if re.search(r'(\w+)_\d+', c) is not None else c for c in train_config['train_cols']})
eval_df, model_info = train.train_eval_inner_cv(**train_config, config=config, feat_df=feat_df, gt_df=gt_df, episode_names=episode_names)
eval_df

## Gents

In [None]:
train_configs = [
    {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)],
        'target_col': 'Audio_StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': ['zcr', 'rms'] + [f'mfcc_{i}' for i in range(20)],
        'target_col': 'Audio_StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)] + ['zcr', 'rms'] + [f'mfcc_{i}' for i in range(20)],
        'target_col': 'Audio_StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)],
        'target_col': 'StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': ['zcr', 'rms'] + [f'mfcc_{i}' for i in range(20)],
        'target_col': 'StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
    },
    {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)] + ['zcr', 'rms'] + [f'mfcc_{i}' for i in range(20)],
        'target_col': 'StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
    }
]

In [None]:
eval_dfs = []
model_infos = {}

for train_config in train_configs:
    config = ', '.join({re.search(r'(\w+)_\d+', c).group(1) if re.search(r'(\w+)_\d+', c) is not None else c for c in train_config['train_cols']})
    print(config)
    eval_df, model_info = train.train_eval_2_to_1(**train_config, config=config, feat_df=feat_df, gt_df=gt_df, episode_names=episode_names)

    eval_dfs.append(eval_df)
    model_infos[config] = model_info

eval_df = pd.concat(eval_dfs)

In [None]:
eval_df

In [None]:
eval_df.to_csv("../data/eval/DT_Gents.csv")

In [None]:
# eval_dfs = []
# model_infos = {}

# for train_config in train_configs:
#     config = ', '.join({re.search(r'(\w+)_\d+', c).group(1) if re.search(r'(\w+)_\d+', c) is not None else c for c in train_config['train_cols']})
#     eval_df, model_info = train.train_eval_inner_cv(**train_config, config=config, feat_df=feat_df, gt_df=gt_df, episode_names=episode_names)

#     eval_dfs.append(eval_df)
#     model_infos[config] = model_info

# eval_df = pd.concat(eval_dfs)

In [None]:
eval_df

In [None]:
train_config = {
        'train_cols': [f'percentage_{i}' for i in range(5)] + [f'hue_{i}' for i in range(5)],
        'target_col': 'StatlerWaldorf',
        'model_name': 'DecisionTree',
        'model': DecisionTreeClassifier(random_state=42)
}

config = ', '.join({re.search(r'(\w+)_\d+', c).group(1) if re.search(r'(\w+)_\d+', c) is not None else c for c in train_config['train_cols']})
eval_df, model_info = train.train_eval_inner_cv(**train_config, config=config, feat_df=feat_df, gt_df=gt_df, episode_names=episode_names)
eval_df

# Stuff

In [None]:
# episode_id = 2
# cap = io.load_video(video_paths[episode_names[episode_id]])

# frames = []
# for i in range(1000):
#     ret, frame = cap.read()
#     frames.append(frame)

# img_orig, color = get_dominant_color(frames[942])

# dominant_color_normalized = [x/255 for x in color]

# # Create a new image of size 100x100 pixels and set all its pixels to the dominant color
# image = np.full((100, 100, 3), dominant_color_normalized)

# # Display the image
# plt.imshow(image)

# plt.imshow(img_orig)

In [None]:
%ls

In [None]:
%ls '../data/frames/Muppets-02-04-04/'

In [None]:
os.path.exists('../data/frames/Muppets-02-04-04/frame_06246.jpg')

In [None]:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
img_path = '../data/frames/Muppets-03-04-03/frame_09302.jpg' 
img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
assert img is not None, "file could not be read, check with os.path.exists()"
laplacian = cv.Laplacian(img,cv.CV_64F)
sobelx = cv.Sobel(img,cv.CV_64F,1,0,ksize=5)
sobely = cv.Sobel(img,cv.CV_64F,0,1,ksize=5)
plt.subplot(2,2,1),plt.imshow(img,cmap = 'gray')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,2),plt.imshow(laplacian,cmap = 'gray')
plt.title('Laplacian'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,3),plt.imshow(sobelx,cmap = 'gray')
plt.title('Sobel X'), plt.xticks([]), plt.yticks([])
plt.subplot(2,2,4),plt.imshow(sobely,cmap = 'gray')
plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])
plt.show()

In [None]:
img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
img[abs(sobely) < 100] = 0.0
plt.subplot(2,2,4),plt.imshow(img)

In [None]:
import math
import cv2 as cv
img_path = '../data/frames/Muppets-03-04-03/frame_09404.jpg' 
src = cv.imread(img_path, cv.IMREAD_GRAYSCALE)    
dst = cv.Canny(src, 50, 200, None, 3)
    
# Copy edges to the images that will display the results in BGR
cdst = cv.cvtColor(dst, cv.COLOR_GRAY2BGR)
cdstP = np.copy(cdst)


lines = cv.HoughLinesWithAccumulator(dst, 1, np.pi / 180, 200, None, 0, 0)

pt1s = []
pt2s = []

if lines is not None:
    for i in range(0, len(lines)):
        rho = lines[i][0][0]
        theta = lines[i][0][1]
        a = math.cos(theta)
        b = math.sin(theta)
        x0 = a * rho
        y0 = b * rho
        pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
        pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
        pt1s.append(pt1)
        pt2s.append(pt2)
        cv.line(cdst, pt1, pt2, (0,0,255), 3, cv.LINE_AA)
        if i == 3:
            break


# linesP = cv.HoughLinesP(dst, 0.5, np.pi / 10, 150, None, 50, 10)

print(len(linesP))

if linesP is not None:
    for i in range(0, len(linesP)):
        l = linesP[i][0]
        cv.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)

plt.subplot(1,3,1)
plt.imshow(src)
plt.subplot(1,3,2)
plt.imshow(cdst)
plt.subplot(1,3,3)
plt.imshow(cdstP)

In [None]:
import seaborn as sns
plt.hlines([p[0] for p in pt1s], [p[1] for p in pt1s], c='orange')
plt.hlines([p[0] for p in pt2s], [p[1] for p in pt2s], c='blue')