# 0. Imports

In [57]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, matthews_corrcoef, accuracy_score, ConfusionMatrixDisplay
import monke_features as mf
import monke_classify as mc
import monke_io as mio
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os.path as path

In [75]:
# KOI
names_koi = ["koi_apr11", "koi_apr17", "koi_apr25", "koi_apr25_3", "koi_apr25_4", "koi_apr25_5", "koi_apr25_6"]

pose_data_koi = mio.read_poses(names_koi)
tremors_raw = mio.read_tremors_multi(names_koi)
labels_koi = {}

for name in tremors_raw:
    labels_koi[name] = mf.generate_labelled_frames(pose_data_koi[name], tremors_raw[name])

# BOBA
names_boba = ["boba_apr11", "boba_apr21", "boba_apr21_2", "boba_apr25", "boba_apr25_2"]

pose_data_boba = mio.read_poses(names_boba)
tremors_raw = mio.read_tremors_multi(names_boba)
labels_boba = {}

for name in tremors_raw:
    labels_boba[name] = mf.generate_labelled_frames(pose_data_boba[name], tremors_raw[name])

# BANDUNG
names_bandung = ["bandung_mar27", "bandung_mar27_2", "bandung_mar27_3", "bandung_may19_2"]

pose_data_bandung = mio.read_poses(names_bandung)
tremors_raw = mio.read_tremors_multi(names_bandung)
labels_bandung = {}

for name in tremors_raw:
    labels_bandung[name] = mf.generate_labelled_frames(pose_data_bandung[name], tremors_raw[name])

# HORLICKS
names_horlicks = ["horlicks_apr12", "horlicks_may2", "horlicks_jun16"]

pose_data_horlicks = mio.read_poses(names_horlicks)
tremors_raw = mio.read_tremors_multi(names_horlicks)
labels_horlicks = {}

for name in tremors_raw:
    labels_horlicks[name] = mf.generate_labelled_frames(pose_data_horlicks[name], tremors_raw[name])
    
# BRIYANI
names_briyani = ["briyani_apr12", "briyani_may19", "briyani_jun16"]

pose_data_briyani = mio.read_poses(names_briyani)
tremors_raw = mio.read_tremors_multi(names_briyani)
labels_briyani = {}

for name in tremors_raw:
    labels_briyani[name] = mf.generate_labelled_frames(pose_data_briyani[name], tremors_raw[name])
    
# TUTU KUEH
names_tutukueh = ["tutukueh_apr21", "tutukueh_may2", "tutukueh_jun15"]

pose_data_tutukueh = mio.read_poses(names_tutukueh)
tremors_raw = mio.read_tremors_multi(names_tutukueh)
labels_tutukueh = {}

for name in tremors_raw:
    labels_tutukueh[name] = mf.generate_labelled_frames(pose_data_tutukueh[name], tremors_raw[name])

# ALL
names_all = {"koi":names_koi, "boba":names_boba, "bandung":names_bandung, 
             "horlicks":names_horlicks, "briyani":names_briyani, "tutukueh":names_tutukueh}
pose_data_all = {"koi":pose_data_koi, "boba":pose_data_boba, "bandung":pose_data_bandung, 
             "horlicks":pose_data_horlicks, "briyani":pose_data_briyani, "tutukueh":pose_data_tutukueh}
labels_all = {"koi":labels_koi, "boba":labels_boba, "bandung":labels_bandung, 
             "horlicks":labels_horlicks, "briyani":labels_briyani, "tutukueh":labels_tutukueh}

In [80]:
def prep_train_test_data(pose_data, labels, train_names, test_names=None, weights=None, test_size=0.2):
    training_data = []
    training_labels = []

    if weights is not None:
        training_weights = []

    testing_data = {}
    testing_labels = {}

    for name in train_names:
        pose_train = pose_data[name]
        labels_train = labels[name]

        if weights is not None:
            weights_train = weights[name]
            X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(pose_train, labels_train, weights_train, test_size=test_size)
            training_weights.append(z_train)
        else:
            X_train, X_test, y_train, y_test = train_test_split(pose_train, labels_train, test_size=test_size)

        training_data.append(X_train)
        training_labels.append(y_train)

        if test_names is not None:
            if name in test_names:
                testing_data[name] = X_test
                testing_labels[name] = y_test
        else:
            testing_data[name] = X_test
            testing_labels[name] = y_test
        
    if(len(training_data) > 1):
        training_data = np.concatenate(training_data)
        training_labels = np.concatenate(training_labels)
        if weights is not None:
            training_weights = np.concatenate(training_weights)
    else:
        training_data = training_data[0]
        training_labels = training_labels[0]
        if weights is not None:
            training_weights = training_weights[0]

    if weights is None:
        return training_data, testing_data, training_labels, testing_labels
    else:
        return training_data, testing_data, training_labels, testing_labels, training_weights

def prep_multi_train_test_data(pose_data, labels, train_names, test_names=None, weights=None, test_size=0.2):
    training_data = {}
    training_labels = {}

    if weights is not None:
        training_weights = {}

    testing_data = {}
    testing_labels = {}

    for name in train_names:
        pose_train = pose_data[name]
        labels_train = labels[name]

        if weights is not None:
            weights_train = weights[name]
            X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(pose_train, labels_train, weights_train, test_size=test_size)
            training_weights[name] = z_train
        else:
            X_train, X_test, y_train, y_test = train_test_split(pose_train, labels_train, test_size=test_size)

        training_data[name] = X_train
        training_labels[name] = y_train
        
        if test_names is not None:
            if name in test_names:
                testing_data[name] = X_test
                testing_labels[name] = y_test
        else:
            testing_data[name] = X_test
            testing_labels[name] = y_test

    if weights is None:
        return training_data, testing_data, training_labels, testing_labels
    else:
        return training_data, testing_data, training_labels, testing_labels, training_weights

def process_data(pose_data, labels, process):
    processed_data = {}
    processed_labels = {}

    for name in pose_data:
        processed_data[name] = process(pose_data[name])
        processed_labels[name] = labels[name][:processed_data[name].shape[0]]
    
    return processed_data, processed_labels

def test_classify(clf, test_data, test_labels):
    predicted_labels = clf.predict(test_data)
    mcc = matthews_corrcoef(test_labels, predicted_labels)
    f1 = f1_score(test_labels, predicted_labels)
    acc = accuracy_score(test_labels, predicted_labels)
    return {"predictions":predicted_labels, "mcc":mcc, "f1":f1, "accuracy":acc}

In [85]:
process = lambda x : mf.changes_in_changes(mf.vel(x), 10, 0.1)

processed_koi_data, processed_koi_labels = process_data(pose_data_koi, labels_koi, process)
# training_data, testing_data, training_labels, testing_labels = prep_multi_train_test_data(processed_data, processed_labels, names_all)

18936
18306
20458
18604
2355
21240
1898


# 1. Steps

## 1.1 Test on Other Koi Video (koi_apr11), 10 minutes and 2 minutes.

In [110]:
len(processed_koi_data['koi_apr25_6'])

1887

In [95]:
combined_train_1_1 = ["koi_apr17", "koi_apr25", "koi_apr25_3", "koi_apr25_4", "koi_apr25_5", "koi_apr25_6"]

In [96]:
combined_train_pose_data_1_1 = {}
combined_train_labels_1_1 = {}
all_train_pose_data_1_1 = np.empty((0,51))
all_train_labels_1_1 = []
for vid in combined_train_1_1:
    all_train_pose_data_1_1 = np.concatenate((all_train_pose_data_1_1, processed_koi_data[vid]))
    all_train_labels_1_1 = np.concatenate((all_train_labels_1_1, processed_koi_labels[vid]))
temp = np.column_stack((all_train_pose_data_1_1, all_train_labels_1_1))

In [119]:
len(temp)

82795

## HIDE FIRST

In [97]:
def pool_and_take_out(data):
    # Calculate 20% of the total data length
    twenty_percent = int(len(data) * 0.20)
    fourty_percent = int(len(data) * 0.40)
    sixty_percent = int(len(data) * 0.60)
    eighty_percent = int(len(data) * 0.80)
    one_hundert_percent = len(data)
    
    # Randomly select 20% of the data
    sampled_data_20 = np.random.choice(data.shape[0], size=twenty_percent, replace=False)
    sampled_data_40 = np.random.choice(data.shape[0], size=fourty_percent, replace=False)
    sampled_data_60 = np.random.choice(data.shape[0], size=sixty_percent, replace=False)
    sampled_data_80 = np.random.choice(data.shape[0], size=eighty_percent, replace=False)
    sampled_data_100 = np.random.choice(data.shape[0], size=one_hundert_percent, replace=False)
    
    # Pool the remaining data
    sampled_data = [sampled_data_20, sampled_data_40, sampled_data_60, sampled_data_80, sampled_data_100]
    
    return sampled_data

In [98]:
taken_out = pool_and_take_out(temp)
print("Taken out data (20%):", taken_out)

Taken out data (20%): [array([73096, 13760, 29222, ..., 35567, 45133, 39220]), array([37273, 19991, 77341, ..., 39484, 72439,  7955]), array([56236, 58508, 59804, ..., 59970, 67834, 13023]), array([23246, 13630, 36162, ..., 63960, 28102, 30919]), array([76850,  8104, 63894, ..., 78254, 67084, 38515])]


In [112]:
len(taken_out[0])

16559

## Ah

In [None]:
results_1_1 = []

for i in range(1,5):
    combos = itertools.combinations(train_names_6c, i)

    results_1_1.append({})

    for train_combo in combos:
        results_1_1[i-1][train_combo] = []
        for _ in range(runs[i-1]):
            training_data, test_data, training_labels, test_labels = prep_train_test_data(data_6c, labels_6c, combined_train_1_1, test_names=test_names_6c)
            clf.fit(training_data, training_labels)
            results_1_1[i-1][train_combo].append(test_classify(clf, np.concatenate([test_data[n] for n in test_data]), np.concatenate([test_labels[m] for m in test_labels])))

In [None]:
test_names_1_1 = ['100per_1_1', '20per_1_1']
train_names_1_1 = ["20_1_1", "40_1_1", "60_1_1", "80_1_1", "100_1_1"]

clf = RandomForestClassifier(class_weight="balanced_subsample")

In [None]:
results_1_1 = []

for i in range(1,6)
    results_1_1[i] = []
    training_data, test_data, training_labels, test_labels = prep_train_test_data(processed_koi_data, processed_koi_labels, train_combo, test_names=test_names_6c)
    clf.fit(training_data, training_labels)
    results_1_1[i].append(test_classify(clf, np.concatenate([test_data[n] for n in test_data]), np.concatenate([test_labels[m] for m in test_labels])))

In [None]:
mccs_6c = []
set_size = 1

for set in results_6c:
    mccs_6c.append([])
    for combo in set:
        for trial in set[combo]:
            mccs_6c[set_size - 1].append(trial["mcc"])
    set_size += 1

avg_mcc_6c = np.mean(mccs_6c, axis=1)
variance_6c = np.var(mccs_6c, axis=1)

In [None]:
xticks = list(range(1, len(variance_6c)+1))
plt.bar(xticks, variance_6c)
plt.title("Variance in MCC of Predictions of Koi April 25 (4) and Koi April 25 (6)")
plt.xlabel("Number of Training Datasets")
plt.ylabel("Variance")
plt.xticks(xticks)
plt.show()

In [None]:
plt.bar(xticks, avg_mcc_6c)
plt.title("Average MCC of Predictions of Koi April 25 (4) and Koi April 25 (6)")
plt.xlabel("Number of Training Datasets")
plt.ylabel("MCC")
plt.xticks(xticks)
plt.show()

## 1.2 Old

In [None]:
test_names_6c = ["koi_apr25_4", "koi_apr25_6"]
train_names_6c = ["koi_apr11", "koi_apr17", "koi_apr25", "koi_apr25_3"]

process = lambda x : mf.changes_in_changes(mf.vel(x), 10, 0.1)
data_6c, labels_6c = process_data(pose_data_koi, labels_koi, process)

runs = [3, 2, 3, 12]
clf = RandomForestClassifier(class_weight="balanced_subsample")

In [None]:
results_6c = []

for i in range(1,5):
    combos = itertools.combinations(train_names_6c, i)

    results_6c.append({})

    for train_combo in combos:
        results_6c[i-1][train_combo] = []
        for _ in range(runs[i-1]):
            training_data, test_data, training_labels, test_labels = prep_train_test_data(data_6c, labels_6c, train_combo, test_names=test_names_6c)
            clf.fit(training_data, training_labels)
            results_6c[i-1][train_combo].append(test_classify(clf, np.concatenate([test_data[n] for n in test_data]), np.concatenate([test_labels[m] for m in test_labels])))

In [None]:
mccs_6c = []
set_size = 1

for set in results_6c:
    mccs_6c.append([])
    for combo in set:
        for trial in set[combo]:
            mccs_6c[set_size - 1].append(trial["mcc"])
    set_size += 1

avg_mcc_6c = np.mean(mccs_6c, axis=1)
variance_6c = np.var(mccs_6c, axis=1)

In [None]:
plt.bar(xticks, avg_mcc_6c)
plt.title("Average MCC of Predictions of Koi April 25 (4) and Koi April 25 (6)")
plt.xlabel("Number of Training Datasets")
plt.ylabel("MCC")
plt.xticks(xticks)
plt.show()