In [None]:
import csv
import dataset_loader
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import similarity
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from typing import List, Tuple
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
DATASET_ROOT="../../sabana/dataset/asap-dataset"
# DATASET_ROOT="../../sabana/dataset/newbie-dataset"

In [None]:
SLICE_DURATION = 5 # sec
EXPANSION_RATE = 2.5
FRAME_PER_SECOND = 20 # Hz
NUM_SAMPLES = 100
QUEUE_SIZE = 8
SETTLING_FRAME = 8
COMPENSATION_FRAME = 0
USE_SUBSEQUENCE_DTW = False

SAVE_ROOT = pathlib.Path("./save")

if not SAVE_ROOT.exists():
    SAVE_ROOT.mkdir(exist_ok=True, parents=True)


In [None]:
gen = dataset_loader.spawn(dataset_root=DATASET_ROOT, 
                           slice_duration=SLICE_DURATION,
                           expansion_rate=EXPANSION_RATE,
                           frame_per_second=FRAME_PER_SECOND,
                           shuffle=True)

# pos/neg-similarities: [euclidean_similarty, timewarping_similarity, length_ratio]
pos_similarities: List[Tuple[float, float, float]] = []
neg_similarities: List[Tuple[float, float, float]] = []

prev_perfs: List[np.ndarray] = [None] * QUEUE_SIZE

In [None]:
with open(SAVE_ROOT / "pos.csv", "w", encoding="utf-8") as f1, \
     open(SAVE_ROOT / "neg.csv", "w", encoding="utf-8") as f2:
    pos_csvfile = csv.writer(f1, delimiter=",", quotechar="|")
    pos_csvfile.writerow(["Euclidean Similarity", "Timewarping Similarity", "Length ratio"])

    neg_csvfile = csv.writer(f2, delimiter=",", quotechar="|")
    neg_csvfile.writerow(["Euclidean Similarity", "Timewarping Similarity", "Length ratio"])

    for idx in tqdm(range(NUM_SAMPLES)):
        score, perf, _ = next(gen)
        score_len = score.shape[-1]
        perf_len = perf.shape[-1]

        pos_euclidean_similarity, pos_timewarping_similarity, _ = similarity.score(score, perf,
                                                                                   settling_frame=SETTLING_FRAME,
                                                                                   compensation_frame=COMPENSATION_FRAME,
                                                                                   use_subsequence_dtw=USE_SUBSEQUENCE_DTW)
        pos_length_ratio = perf_len / (score_len + 1e-7)

        pos_csvfile.writerow([pos_euclidean_similarity, pos_timewarping_similarity, pos_length_ratio])
        pos_similarities.append((pos_euclidean_similarity, pos_timewarping_similarity, pos_length_ratio))
        
        if isinstance(prev_perfs[0], np.ndarray):
            prev_perf = prev_perfs[0]
            prev_perf_len = prev_perf.shape[-1]
            neg_euclidean_similarity, neg_timewarping_similarity, _ = similarity.score(score, prev_perf,
                                                                                       settling_frame=SETTLING_FRAME,
                                                                                       compensation_frame=COMPENSATION_FRAME,
                                                                                       use_subsequence_dtw=USE_SUBSEQUENCE_DTW)
            neg_length_ratio = prev_perf_len / (score_len + 1e-7)
            
            neg_csvfile.writerow([neg_euclidean_similarity, neg_timewarping_similarity, neg_length_ratio])
            neg_similarities.append((neg_euclidean_similarity, neg_timewarping_similarity, neg_length_ratio))
        prev_perfs.pop(0)
        prev_perfs.append(perf)

In [None]:
pos_similarities = np.array(pos_similarities)
neg_similarities = np.array(neg_similarities)
print(pos_similarities.shape)
print(neg_similarities.shape)

In [None]:
fig = plt.figure()
ax = fig.gca()
ax.set_title(f"expansion rate: {EXPANSION_RATE}, settling frame: {SETTLING_FRAME}")
ax.set_xlabel("Euclidean Similarity")
ax.set_ylabel("Timewarping Similarity")
ax.scatter(pos_similarities[:, 0], pos_similarities[:, 1], c="k", label="Positive")
ax.scatter(neg_similarities[:, 0], neg_similarities[:, 1], c="w", edgecolors="k", label="Negative")
plt.legend()
plt.show()
plt.clf()

In [None]:
fig = plt.figure()
ax = fig.gca(projection="3d")
ax.set_title(f"expansion rate: {EXPANSION_RATE}, settling frame: {SETTLING_FRAME}")
ax.set_xlabel("Euclidean similarity")
ax.set_ylabel("Timewarping similarity")
ax.set_zlabel("Length ratio")
ax.scatter(pos_similarities[:,0], pos_similarities[:,1], pos_similarities[:,2], c="k", label="Positive")
ax.scatter(neg_similarities[:,0], neg_similarities[:,1], neg_similarities[:,2], c="w", edgecolors="k", label="Negative")
plt.legend()
plt.show()

plt.clf()

In [None]:
x_total = np.concatenate([pos_similarities, neg_similarities])
y_total = np.array([+1 for _ in range(len(pos_similarities))] + [-1 for _ in range(len(neg_similarities))])
print(x_total.shape)
print(y_total.shape)


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_total, y_total, test_size=0.2, stratify=y_total, random_state=42)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
svm_linear_2d = SVC(kernel="linear")
svm_linear_2d.fit(x_train[:, 0:2], y_train)
print("SVM(linear) Accuracy:", svm_linear_2d.score(x_test[:, 0:2], y_test))

In [None]:
svm_linear_3d = SVC(kernel="linear")
svm_linear_3d.fit(x_train, y_train)
print("SVM(linear) Accuracy:", svm_linear_3d.score(x_test, y_test))

In [None]:
svm_poly3 = SVC(kernel="poly", degree=3)
svm_poly3.fit(x_train, y_train)
print("SVM(poly_3) Accuracy:", svm_poly3.score(x_test, y_test))

In [None]:
svm_poly5 = SVC(kernel="poly", degree=5)
svm_poly5.fit(x_train, y_train)
print("SVM(poly_5) Accuracy:", svm_poly5.score(x_test, y_test))

In [None]:
svm_poly7 = SVC(kernel="poly", degree=7)
svm_poly7.fit(x_train, y_train)
print("SVM(poly_7) Accuracy:", svm_poly7.score(x_test, y_test))

In [None]:
svm_rbf = SVC(kernel="rbf")
svm_rbf.fit(x_train, y_train)
print("SVM(rbf) Accuracy:", svm_rbf.score(x_test, y_test))