In [None]:
from handcrafted.app.dataset.dataset import Dataset
from handcrafted.app.dataset.utils.dataset_splitter import SignerDatasetSplitter
from handcrafted.app.features.plotter.frames_plotter import plot_frames
from handcrafted.app.model.model_statistics import ModelStatistics
from handcrafted.app.model.svc_classifier import SVCClassifier

import numpy as np

dataset = Dataset("data/WLASL_v0.3.json")
n_videos = 2000

In [None]:
videos = dataset.videos
len(videos)

In [None]:
signers = [video.signer_id for video in videos]
len(signers)

In [None]:
splitter = SignerDatasetSplitter(videos[:n_videos], frames_split=0.01)
train_frames, val_frames, test_frames = splitter.train_test_split(test_size=0.2, val_size=0.2, random_state=42)
augmented_train_frames = splitter.apply_data_augmentation(train_frames, num_augmentations=0)

In [None]:
print(len(train_frames))
print(len(augmented_train_frames))
print(len(val_frames))
print(len(test_frames))

In [None]:
plot_frames([signer_frame.frame for signer_frame in augmented_train_frames[:30]])

In [None]:
X_train = [signer_frame.features for signer_frame in augmented_train_frames]
y_train = [signer_frame.signer_id for signer_frame in augmented_train_frames]
X_val = [signer_frame.features for signer_frame in val_frames]
y_val = [signer_frame.signer_id for signer_frame in val_frames]
X_test = [signer_frame.features for signer_frame in test_frames]
y_test = [signer_frame.signer_id for signer_frame in test_frames]

In [None]:
print(len(X_train))
print(len(y_train))
print(len(X_val))
print(len(y_val))
print(len(X_test))
print(len(y_test))

In [None]:
svc = SVCClassifier()
svc.train(np.array(X_train), y_train)

In [None]:
y_pred = svc.predict(X_test, y_test)

In [None]:
stats = ModelStatistics(save_name=f"svc_{n_videos}_signer_test_{len(y_test)}", save_dir="signer/plots")

In [None]:
stats.print_accuracy(y_test, y_pred)

In [None]:
stats.plot_confusion_matrix(y_test, y_pred, save=True, plot=True)