# Character Trajectories dataset

Link to [Official repository](https://archive-beta.ics.uci.edu/dataset/175/character+trajectories) on UC Irvine


### Data description:

Multiple, labelled samples of pen tip trajectories recorded whilst writing individual characters. All samples are from the same writer, for the purposes of primitive extraction. Only characters with a single pen-down segment were considered.

Each character sample is a 3-dimensional pen tip velocity trajectory. This is contained in matrix format, with 3 rows and T columns where T is the length of the character sample.

The characters here were used for a PhD study on primitive extraction using HMM based models. The data consists of 2858 character samples, contained in the cell array 'mixout'. The struct variable 'consts' contains a field consts.charlabels which provides ennummerated labels for the characters. consts.key provides the key for each label. The data was captured using a WACOM tablet. 3 Dimensions were kept - x, y, and pen tip force. The data has been numerically differentiated and Gaussian smoothed, with a sigma value of 2. Data was captured at 200Hz. The data was normalised with consts.datanorm. Only characters with a single 'PEN-DOWN' segment were considered. Character segmentation was performed using a pen tip force cut-off point. The characters have also been shifted so that their velocity profiles best match the mean of the set.

## Download database

Run the cell below to collect the archives in your folder.

Safe cell that will create a folder `../trajectory_dataset` (), download and overwrite the files in this folder.

In [None]:
import os 
import requests

if not os.path.exists("../trajectory_dataset"):
    os.mkdir("../trajectory_dataset")
    r = requests.get("https://archive.ics.uci.edu/ml/machine-learning-databases/character-trajectories/mixoutALL_shifted.mat")
    open('../trajectory_dataset/mixoutALL_shifted.mat', 'wb').write(r.content)
    r = requests.get(" https://archive.ics.uci.edu/ml/machine-learning-databases/character-trajectories/trajectories.names")

    open('../trajectory_dataset/trajectories.names', 'wb').write(r.content)
# ! wget -O ../trajectory_dataset/mixoutALL_shifted.mat https://archive.ics.uci.edu/ml/machine-learning-databases/character-trajectories/mixoutALL_shifted.mat
# ! wget -O ../trajectory_dataset/trajectories.names https://archive.ics.uci.edu/ml/machine-learning-databases/character-trajectories/trajectories.names

## Interacting with data

In [None]:
from scipy.io import loadmat
import numpy as np

# load data in a python dictionnary
mat = loadmat('../trajectory_dataset/mixoutALL_shifted.mat')

mat.keys()

In [None]:
# Information about the dataset

n_samples = mat["mixout"][0].__len__()

print(f"Number of samples:\t {n_samples}")


# Loading samples labels
keys = mat["consts"][0,0][4]
keys = [key - 1 for key in keys[0]]

labels = mat["consts"][0,0][3]
labels = np.array([label[0] for label in labels[0]])

samples_label = labels[[keys]][0]

label_unique, label_count = np.unique(samples_label, return_counts=True)

print()
print("Data distribution:")
print(f"{len(label_unique)} different class")
line_u = "_______" + str.join("_", ["___" for _ in label_count]) + "_"
print(line_u)
char = "|CHAR  | " + str.join(" | ", label_unique) + " |"
print(char)
line_d = "|______|" + str.join("|", ["___" for _ in label_count]) + "|"
print(line_d)
count = "|COUNT |" + str.join("|", [str(c) for c in label_count]) + "|"
print(count)
print(line_d)


samples = mat["mixout"][0]

f_s = 200 #Hz sampling at 200Hz

print(f"\n\nN_features: \t\t{samples[0].shape[0]} ---> \t(v_x, v_y, pen_tip_force)")
print(f"Sampling frequency: \t{f_s} Hz")


In [None]:
import matplotlib.pyplot as plt

rdm_sample_idx = np.random.randint(0, len(samples), 10)

for i, i_sample in enumerate(rdm_sample_idx):

    exsample = samples[i_sample]

    x_speed = exsample[0]
    y_speed = exsample[1]
    pen_tip_f = exsample[2]
    

    # Conversion to trajectory beggining at point (0,0)
    x = [0]
    y = [0]

    for x_v, y_v in zip(x_speed, y_speed):
        x.append(x[-1] + x_v/f_s)
        y.append(y[-1] + y_v/f_s)

    plt.subplot(2, 5, i+1)
    plt.scatter(x[:], y[:], c=np.arange(len(x)), s=5)

    # To modulate the linewidth depending on the pen tip force 
    # plt.scatter(x[1:], y[1:], c=np.arange(len(x[1:])), s=2*np.abs(pen_tip_f))
    
    plt.plot(x, y, alpha=0.5)
    
    plt.title(samples_label[i_sample])

    plt.axis('equal')
    plt.axis('off')
    #plt.colorbar()

plt.suptitle("Characters samples (start in indigo, end in yellow)")
plt.show()

for i, i_sample in enumerate(rdm_sample_idx):

    exsample = samples[i_sample]

    pen_tip_f = exsample[2]

    plt.subplot(2, 5, i+1)
    plt.plot(np.arange(len(pen_tip_f)), pen_tip_f)
    plt.title(samples_label[i_sample])

plt.suptitle("Pen tip force curves")
plt.show()

for i, i_sample in enumerate(rdm_sample_idx):

    exsample = samples[i_sample]

    x_speed = exsample[0]

    plt.subplot(2, 5, i+1)
    plt.plot(np.arange(len(x_speed)), x_speed)
    plt.title(samples_label[i_sample])

plt.suptitle("X speed (+ = right, - = down)")
plt.show()

for i, i_sample in enumerate(rdm_sample_idx):

    exsample = samples[i_sample]

    y_speed = exsample[1]

    plt.subplot(2, 5, i+1)
    plt.plot(np.arange(len(y_speed)), y_speed)
    plt.title(samples_label[i_sample])

plt.suptitle("Y speed (+ = up, - = left)")
plt.show()

for i, i_sample in enumerate(rdm_sample_idx):

    exsample = samples[i_sample]

    x_speed = exsample[0]
    y_speed = exsample[1]
    pen_tip_f = exsample[2]
    

    # Conversion to trajectory beggining at point (0,0)
    x = [0]
    y = [0]

    for x_v, y_v in zip(x_speed, y_speed):
        x.append(x[-1] + x_v/f_s)
        y.append(y[-1] + y_v/f_s)

    plt.subplot(2, 5, i+1)
    plt.plot(np.arange(len(x)), x)
    plt.title(samples_label[i_sample])

plt.suptitle("X position")
plt.show()


for i, i_sample in enumerate(rdm_sample_idx):

    exsample = samples[i_sample]

    x_speed = exsample[0]
    y_speed = exsample[1]
    pen_tip_f = exsample[2]
    

    # Conversion to trajectory beggining at point (0,0)
    x = [0]
    y = [0]

    for x_v, y_v in zip(x_speed, y_speed):
        x.append(x[-1] + x_v/f_s)
        y.append(y[-1] + y_v/f_s)

    plt.subplot(2, 5, i+1)
    plt.plot(np.arange(len(y)), y)
    plt.title(samples_label[i_sample])

plt.suptitle("Y position")
plt.show()

## TWI-kSVD on char-traj data

In [None]:
x_speeds = []
y_speeds = []
pen_tip_forces = []
max_len = 0
min_len = 1000

for sample in samples:
    x_speeds.append(sample[0])
    y_speeds.append(sample[1])
    pen_tip_forces.append(sample[2])
    
    if len(sample[0]) > max_len:
        max_len = len(sample[0])
    
    if len(sample[0]) < min_len:
        min_len = len(sample[0])


In [None]:
print(f"Longueur minimale : {min_len}")
print(f"Longueur maximale : {max_len}")

In [None]:
for i, x_speed in enumerate(x_speeds[:2]):

    plt.subplot(2, 1, i+1)
    plt.plot(np.arange(len(x_speed)), x_speed)
    plt.title(samples_label[i])

In [None]:
samples_label.shape


In [None]:
from sklearn.model_selection import train_test_split

train_idx, test_idx = train_test_split(np.arange(len(x_speeds)), test_size=0.4, stratify=samples_label)

x_train = [x_speeds[i] for i in train_idx if samples_label[i] == 'a']
x_test = [x_speeds[i] for i in test_idx if samples_label[i] == 'a']

y_train = [y_speeds[i] for i in train_idx if samples_label[i] == 'a']
y_test = [y_speeds[i] for i in test_idx if samples_label[i] == 'a']

In [None]:
from twi_ksvd.ksvd import TWI_kSVD
from scipy import signal

# Au maximum 20 atomes pour décrire tous les signaux
K = 10
model_x = TWI_kSVD( K, max_iter=20)
# En décrivant chaque signal par au plus 5 atomes
tau = 2

D_list = []

dico_idx = np.random.choice(np.arange(len(x_train)), K)
for i in range(K):
    D_list.append(x_train[dico_idx[i]])

    # atom_length = 50  #(min_len + (i  * (max_len - min_len)) // (K-1)) // 4

    # phase = 2* np.pi * np.random.random(1)

    # window = signal.windows.hamming(atom_length)

    # window = window - np.min(window)
    
    # t = np.arange(atom_length) * np.random.random(1) / 5

    # D_list.append(np.cos(t+phase)*window)

In [None]:
for i, atom in enumerate(D_list):

    plt.plot(np.arange(len(atom)), atom, label=f"{i}")

plt.title("Initial random cosine atoms")
plt.legend()
plt.show()
    

In [None]:
Ax,Dx = model_x.fit(x_train, D_list, tau, r_window=20)

In [None]:
Ax, Dx = model_x.alphas, model_x.D

In [None]:
# Au maximum 20 atomes pour décrire tous les signaux
K = 5
model_y = TWI_kSVD( K, max_iter=20)
# En décrivant chaque signal par au plus 5 atomes
tau = 3

D_list = []
for i in range(K):
    atom_length = 50  #(min_len + (i  * (max_len - min_len)) // (K-1)) // 4

    phase = 2* np.pi * np.random.random(1)

    window = signal.windows.hamming(atom_length)

    window = window - np.min(window)
    
    t = np.arange(atom_length) * np.random.random(1) / 5

    D_list.append(np.cos(t+phase)*window)

In [None]:

Ay,Dy = model_y.fit(y_train, D_list, tau, r_window=None)

In [None]:
Ay, Dy = model_y.alphas, model_y.D

In [None]:
for i, atom in enumerate(model_x.D):

    plt.plot(np.arange(len(atom)), atom, label=f"{i}")

plt.title("Final random cosine atoms (x_speeds)")
plt.legend()
plt.show()
    
for i, atom in enumerate(model_y.D):

    plt.plot(np.arange(len(atom)), atom, label=f"{i}")

plt.title("Final random cosine atoms (y_speeds)")
plt.legend()
plt.show()
    

In [None]:
from twi_ksvd.omp import TWI_OMP



for p, i in enumerate([train_idx[i] for i in np.random.randint(0, len(train_idx), 2)] + [test_idx[i] for i in np.random.randint(0, len(test_idx), 2)]):
    alphas, deltas = TWI_OMP(x_speeds[i], Dx, tau=tau, r_window=10)

    reconstructed_x_signal = np.zeros_like(x_speeds[i])

    plt.subplot(4, 2, 2*p + 1)
    for alpha, delta, atom in zip(alphas, deltas, Dx):
        if alpha != 0:
            reconstructed_x_signal += alpha * delta @ atom
            plt.plot(alpha * delta @ atom)
    
    plt.plot(reconstructed_x_signal)
    plt.subplot(4, 2, 2*p + 2)
    plt.plot(x_speeds[i])

In [None]:
for p, i in enumerate([train_idx[i] for i in np.random.randint(0, len(train_idx), 2)] + [test_idx[i] for i in np.random.randint(0, len(test_idx), 2)]):
    alphas, deltas = TWI_OMP(y_speeds[i], Dy, tau=tau, r_window=10)

    reconstructed_y_signal = np.zeros_like(y_speeds[i])

    plt.subplot(4, 2, 2*p + 1)
    for alpha, delta, atom in zip(alphas, deltas, Dy):
        if alpha != 0:
            reconstructed_y_signal += alpha * delta @ atom
            plt.plot(alpha * delta @ atom)
    
    plt.plot(reconstructed_y_signal)
    plt.subplot(4, 2, 2*p + 2)
    plt.plot(y_speeds[i])

In [None]:
for p, i in enumerate([train_idx[i] for i in np.random.randint(0, len(train_idx), 2)] + [test_idx[i] for i in np.random.randint(0, len(test_idx), 2)]):
    
    #x speeds reconstruction
    alphas, deltas = TWI_OMP(x_speeds[i], Dx, tau=tau, r_window=10)

    reconstructed_x_signal = np.zeros_like(x_speeds[i])

    for alpha, delta, atom in zip(alphas, deltas, Dx):
        if alpha != 0:
            reconstructed_x_signal += alpha * delta @ atom
    
    # y reconstruction
    alphas, deltas = TWI_OMP(y_speeds[i], Dy, tau=tau, r_window=10)

    reconstructed_y_signal = np.zeros_like(y_speeds[i])

    for alpha, delta, atom in zip(alphas, deltas, Dy):
        if alpha != 0:
            reconstructed_y_signal += alpha * delta @ atom
            #plt.plot(alpha * delta @ atom)

    exsample = samples[i]

    x_speed = reconstructed_x_signal
    y_speed = reconstructed_y_signal
    pen_tip_f = exsample[2]
    

    # Conversion to trajectory beggining at point (0,0)
    x = [0]
    y = [0]

    for x_v, y_v in zip(x_speed, y_speed):
        x.append(x[-1] + x_v/f_s)
        y.append(y[-1] + y_v/f_s)


    plt.subplot(2, 2, p + 1)
    plt.scatter(x[:], y[:], c=np.arange(len(x)), s=5)
    plt.title(samples_label[i])
    plt.axis('off')

In [None]:
for i, atom in enumerate(Dx):
    os.makedirs("../trajectory_dataset/x_dictionnary", exist_ok=True)
    np.save("../trajectory_dataset/x_dictionnary/atom_" + str(i) + ".npy", atom)

for i, atom in enumerate(Dy):
    os.makedirs("../trajectory_dataset/y_dictionnary", exist_ok=True)
    np.save("../trajectory_dataset/y_dictionnary/atom_" + str(i) + ".npy", atom)