<a href="https://colab.research.google.com/github/raljun/data-analysis/blob/main/GroupProject18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project Group 18

```
 Gestures Recognition
```



In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
!unzip -o data.zip


In [None]:
import os

# Lister le dossier pour vérifier que le fichier existe
if os.path.exists("data/Domain1_csv/Subject1-0-1.csv"):
    print("✔️ Dataset found")
else:
    print("❌ Fichier manquant : data/Domain1_csv/Subject1-0-1.csv")



```
Import of Libraries
```



In [None]:
import scipy.interpolate as interp
import numpy as np
from numba import njit
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
import os  # nécessaire pour vérifier l'existence des fichiers

def read_files(domain):
    """
    Reads gesture CSV files from a given domain and returns:
    - data: list of 3D point sequences (x, y, z)
    - points_array: list of full sequences (x, y, z, t)
    - seq_array: corresponding digit labels
    """
    data = []         # stores 3D coordinates only
    svm_points = []   # stores all columns (x, y, z, t)
    y_svm = []        # stores digit labels

    for a in range(1, 11):      # user ID 1–10
        for b in range(0, 10):  # digit class 0–9
            for c in range(1, 11):  # repetition 1–10
                direc = f"data/Domain{domain}_csv/Subject{a}-{b}-{c}.csv"

                # Vérification optionnelle
                if not os.path.exists(direc):
                  print(f"File not found: {direc}")
                  continue


                df = pd.read_csv(direc)
                y_svm.append(b)
                svm_points.append(np.array(df.values))              # full data: x, y, z, t
                data.append(np.array(df.iloc[:, 0:3].values))       # only x, y, z

    points_array = np.array(svm_points, dtype=object)
    seq_array = np.array(y_svm, dtype=object)

    return data, points_array, seq_array

@njit()
def euclidean_distance(x, y):
    """Compute Euclidean distance between two vectors x and y"""
    return np.sqrt(np.sum((x - y)**2))

@njit(nogil=True)
def dtw(serie1, serie2):
    """
    Compute the DTW distance between two sequences of 3D points.
    @pre: serie1 and serie2 are numpy arrays of shape (n, 3)
    @post: returns the DTW alignment cost
    """
    l1, l2 = len(serie1), len(serie2)
    cost_matrix = np.full((l1 + 1, l2 + 1), np.inf)
    cost_matrix[0, 0] = 0.0

    for i in range(l1):
        for j in range(l2):
            cost = euclidean_distance(serie1[i], serie2[j])
            cost_matrix[i+1, j+1] = cost + min(
                cost_matrix[i, j+1],
                cost_matrix[i+1, j],
                cost_matrix[i, j]
            )

    return cost_matrix[-1, -1]

def plot_conf_mat(labels, pred_labels, user_id):
    """
    Plot and save the confusion matrix for a given user's predictions.
    @pre: labels – true labels (list or array)
          pred_labels – predicted labels
          user_id – integer, ID of the user
    @post: Saves a PNG file of the confusion matrix.
    """
    conf_mat = confusion_matrix(labels, pred_labels)
    df_cm = pd.DataFrame(conf_mat, range(10), range(10))
    sn.set(font_scale=1.2)
    plt.figure(figsize=(8, 5))
    sn.heatmap(df_cm, annot=True)       ## cmap="Blues"
    plt.title(f"User {user_id+1} – Confusion Matrix")
    plt.savefig(f"confusion_matrix_user{user_id+1}.png")
    plt.show()
    plt.close()


def cross_validation_user_independent_split(X, y, i):
    X = np.array(X, dtype=object)
    y = np.array(y, dtype=object)

    test_idx = slice((i-1)*100, i*100)
    test_points = X[test_idx]
    test_labels = y[test_idx]

    train_mask = np.ones(len(X), dtype=bool)
    train_mask[test_idx] = False
    train_points = X[train_mask]
    train_labels = y[train_mask]

    return train_points, test_points, train_labels, test_labels

def cross_validation_user_dependent_split(X, y, n):
    """
    Performs a user-dependent cross-validation split:
    for each digit (0–9), selects the nth repetition (n=1 to 10) as test,
    and the 9 others as training.

    @pre: X and y contain 100 samples for one user (10 digits × 10 repetitions)
    @param n: repetition index to use for test samples (1 ≤ n ≤ 10)
    @return: training points, test points, training labels, test labels
    """
    X = np.array(X)
    y = np.array(y)

    # Indices of the nth repetition for each digit
    test_idx = [(n - 1) + 10 * i for i in range(10)]

    # Split data
    test_points = X[test_idx]
    test_labels = y[test_idx]

    train_mask = np.ones(100, dtype=bool)
    train_mask[test_idx] = False
    train_points = X[train_mask]
    train_labels = y[train_mask]

    return train_points, test_points, train_labels, test_labels

def get_user(X, y, user, limit=100):
    """
    Extract the gesture data and labels for a specific user.
    @param user: user ID (1 to 10)
    @param limit: number of samples per user (default: 100)
    @return: points_array (sequences), seq_array (labels)
    """
    X = np.array(X, dtype=object)
    y = np.array(y, dtype=object)
    indexes = range((user - 1) * limit, user * limit)
    return X[indexes], y[indexes]

def resampling(sequence, n_new=80):
    """
    Resample a time series (3D + time) to a fixed number of points using linear interpolation.
    @param sequence: numpy array of shape (n_old, 4) with columns [x, y, z, t]
    @param n_new: number of points in the resampled sequence
    @return: array of shape (n_new, 4)
    """
    n_old, m = sequence.shape
    mat_new = np.zeros((n_new, m))

    x_old = np.asarray(sequence[:, 3]).squeeze()
    x_new = np.linspace(sequence[:, 3].min(), sequence[:, 3].max(), n_new)

    for j in range(m - 1):  # interpolate x, y, z
        y_old = np.asarray(sequence[:, j]).squeeze()
        interpolator = interp.interp1d(x_old, y_old, fill_value="extrapolate")
        mat_new[:, j] = interpolator(x_new)

    mat_new[:, -1] = x_new  # put time in last column
    return mat_new

def get_acc(X):
    """
    Compute the instantaneous acceleration magnitude from a 3D time series.

    Parameters:
        X (ndarray): Array of shape (n, 4) with columns [x, y, z, t],
                     where t is time.

    Returns:
        ndarray: A 1D array of acceleration magnitudes at each time step.
    """
    dt = np.gradient(X[:, 3])
    dx_dt = np.gradient(X[:, 0]) / dt
    dy_dt = np.gradient(X[:, 1]) / dt
    dz_dt = np.gradient(X[:, 2]) / dt

    d2x_dt2 = np.gradient(dx_dt) / dt
    d2y_dt2 = np.gradient(dy_dt) / dt
    d2z_dt2 = np.gradient(dz_dt) / dt

    acceleration = np.sqrt(d2x_dt2**2 + d2y_dt2**2 + d2z_dt2**2)
    return acceleration


def get_angle(X):
    """
    Compute the angular orientation of the gesture's acceleration vector
    in the YZ plane with respect to the X axis.

    Parameters:
        X (ndarray): Array of shape (n, 4), with [x, y, z, t].

    Returns:
        ndarray: A 1D array of angles (in radians) at each time step.
    """
    dt = np.gradient(X[:, 3])
    dx_dt = np.gradient(X[:, 0]) / dt
    dy_dt = np.gradient(X[:, 1]) / dt
    dz_dt = np.gradient(X[:, 2]) / dt

    d2x_dt2 = np.gradient(dx_dt) / dt
    d2y_dt2 = np.gradient(dy_dt) / dt
    d2z_dt2 = np.gradient(dz_dt) / dt

    angle = np.arctan2(np.sqrt(d2y_dt2**2 + d2z_dt2**2), np.abs(d2x_dt2))
    return angle

def get_kinetic_energy(X, mass):
    """
    Compute the kinetic energy at each time step of a gesture trajectory.

    Parameters:
        X (ndarray): Array of shape (n, 4), with [x, y, z, t].
        mass (float): Constant mass of the moving object.

    Returns:
        ndarray: A 1D array of kinetic energy values at each time step.
    """
    dt = np.gradient(X[:, 3])
    dx_dt = np.gradient(X[:, 0]) / dt
    dy_dt = np.gradient(X[:, 1]) / dt
    dz_dt = np.gradient(X[:, 2]) / dt

    velocity = np.sqrt(dx_dt**2 + dy_dt**2 + dz_dt**2)
    kinetic_energy = 0.5 * mass * velocity**2
    return kinetic_energy


def get_speed(X):
    """
    Compute the instantaneous speed of a 3D gesture trajectory.

    Parameters:
        X (ndarray): Array of shape (n, 4), where the columns are [x, y, z, t].

    Returns:
        ndarray: A 1D array of speed magnitudes at each time step.
    """
    dt = np.gradient(X[:, 3])
    dx_dt = np.gradient(X[:, 0]) / dt
    dy_dt = np.gradient(X[:, 1]) / dt
    dz_dt = np.gradient(X[:, 2]) / dt

    speed = np.sqrt(dx_dt**2 + dy_dt**2 + dz_dt**2)
    return speed




```
Dynamic time warping(DWT) and K-Nearest Neighbors(KNN)
```



In [None]:
from numba.typed import List
from sklearn.metrics import accuracy_score
from joblib import Parallel, delayed

In [None]:
class KNN_DTW:
    """
    K-Nearest Neighbors classifier using DTW distance for time series data.
    """

    def __init__(self, n_neighbors=1):
        self.n_neighbors = n_neighbors

    def fit(self, x, labels):
        """
        Store training sequences and their labels.

        Parameters:
            x (array-like): list or array of time series
            labels (array-like): list of class labels
        """
        self.x_train = np.array(x)
        self.labels = np.array(labels)

    def predict(self, x_test):
        """
        Predict labels for the test set using DTW-based KNN.

        Parameters:
            x_test (array-like): list of time series to classify

        Returns:
            ndarray: predicted labels
        """
        matrix = Parallel(n_jobs=-1, prefer="threads", verbose=0)(
            delayed(dtw)(
                np.array(x_test[i], dtype=np.float64),
                np.array(self.x_train[j], dtype=np.float64)
            )
            for i in range(len(x_test)) for j in range(len(self.x_train))
        )

        dist_matrix = np.array(matrix).reshape((len(x_test), -1))
        indexes = dist_matrix.argsort()[:, :self.n_neighbors]
        neighbor_labels = self.labels[indexes]
        return pd.DataFrame(neighbor_labels).mode(axis=1).iloc[:, 0].to_numpy()

def test(train_set, test_set, train_labels, test_labels, model):
  """
  Train a KNN model and evaluate it on test data.

  Parameters:
  train_set (array-like): sequences used for training
  test_set (array-like): sequences used for testing
  train_labels (array-like): true labels for training data
  test_labels (array-like): true labels for test data
  model: classifier implementing fit() and predict()

  Returns:
    Tuple:
      - float: accuracy score
      - ndarray: predicted labels
  """
  model.fit(train_set, train_labels)
  predictions = model.predict(test_set)
  y_true = np.array(test_labels, dtype=int).flatten()
  y_pred = np.array(predictions, dtype=int).flatten()
  accuracy = accuracy_score(y_true, y_pred)
  return accuracy, predictions



def validation_inter(dataset, labels, model, typeUser="independent", user=0):
  """
  Perform cross-validation either in user-independent or user-dependent mode.

  Parameters:
      dataset (array-like): list of gesture sequences
      labels (array-like): list of gesture labels
      model: a classifier with .fit() and .predict() methods
      typeUser (str): "independent" or "dependent"
      user (int): user ID (used only in dependent mode)

  Side effects:
      Appends accuracy and prediction results to global lists:
      accuracies, predictions, labe
  """
  for j in range(10):
    if typeUser == "independent":
      X_train, X_test, seq_train, seq_test = cross_validation_user_independent_split(dataset, labels, j + 1)
    else:
      X_train, X_test, seq_train, seq_test = cross_validation_user_dependent_split(dataset, labels, j + 1)

    accuracy, prediction = test(X_train, X_test, seq_train, seq_test, model)
    accuracies.append(accuracy)
    predictions.append(prediction)
    labe.append(np.array(seq_test, dtype=int))

    if typeUser == "independent":
      print(f"The user independent score {j + 1}: {accuracy * 100:.1f}%")
    else:
      print(f"The user dependent score {user} using the {j + 1} try of each gesture: {accuracy * 100:.1f}%")

def validationInd(dataset, labels, model):
  """
  Run user-independent validation and return the results.

  Parameters:
    dataset (array-like): list of gesture sequences
    labels (array-like): corresponding gesture labels
    model: a KNN_DTW model or compatible classifier

    Returns:
     tuple: (accuracies, predictions, true_labels) for all users
  """
  dataset = np.array(dataset)
  labels = np.array(labels)

  print("\n\t******\tUSER INDEPENDENT\t******\t\n")
  validation_inter(dataset, labels, model, typeUser="independent")
  return accuracies, predictions, labe

def validationDep(dataset, labels, model):
  """
  Run user-dependent validation and return the results.

  Parameters:
    dataset (array-like): full list of gesture sequences
    labels (array-like): full list of labels
    model: classifier with fit and predict methods

    Returns:
      tuple: (accuracies, predictions, true_labels) for all users
  """
  dataset = np.array(dataset)
  labels = np.array(labels)
  print("\n\t******\tUSER DEPENDENT\t******\t\n")

  for i in range(10):  # for the 10 users
    user_data, user_labels = get_user(dataset, labels, i + 1)
    validation_inter(user_data, user_labels, model, typeUser="dependent", user=i + 1)
    return accuracies, predictions, labe

def clear_globals():
  """
  Reset global tracking lists for new validation runs.
  """
  global accuracies, predictions, labe
  from numba.typed import List
  accuracies = List()
  predictions = List()
  labe = []

def run_validation_pipeline(data, model):
  """
  Runs both user-independent and user-dependent validations and prints results.
  """
  clear_globals()

  print("\n=== USER-INDEPENDENT VALIDATION ===")
  acc_in, pred_in, labels_in = validationInd(np.array(data[0], dtype=object).T, np.array(data[2]), model)
  print(f"Average accuracy: {np.mean(acc_in)*100:.2f}%")
  print(f"Standard deviation: {np.std(acc_in):.2f}")
  for user_id in range(len(pred_in)):
    plot_conf_mat(labels_in[user_id], pred_in[user_id], user_id)


  clear_globals()

  print("\n=== USER-DEPENDENT VALIDATION ===")
  acc_dep, pred_dep, labels_dep = validationDep(np.array(data[0], dtype=object).T, np.array(data[2]), model)
  print(f"Average accuracy: {np.mean(acc_dep)*100:.2f}%")
  print(f"Standard deviation: {np.std(acc_dep):.2f}")
  for user_id in range(len(pred_dep)):
    plot_conf_mat(labels_dep[user_id], pred_dep[user_id], user_id)




```
Simulation of DWT and KNN
```



In [None]:
if __name__ == "__main__":
    model = KNN_DTW(n_neighbors=3)
    data = read_files(1)
    run_validation_pipeline(data, model)



```
Advanced method: Support Vector Machine(SVM)
```



In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [None]:
def extract_features(data):
    """
    Extracts speed, acceleration, and angular features from resampled 3D gestures.
    """
    speed = np.array([get_speed(p) for p in data])
    acc = np.array([get_acc(p) for p in data])
    angle = np.array([get_angle(p) for p in data])
    return np.concatenate([speed, acc, angle], axis=1)

def svm_user_independent(points_array, seq_array):
    """
    Perform user-independent validation using a linear SVM classifier.
    For each user, train on the 9 others and test on that user.
    """
    print("\n--- SVM USER-INDEPENDENT ---")
    all_accuracies = []
    all_y_true = []
    all_y_pred = []

    for i in range(1, 11):  # Leave-one-user-out
        X_train, X_test, seq_train, seq_test = cross_validation_user_independent_split(points_array, seq_array, i)

        X_train_resampled = np.array([resampling(p) for p in X_train])
        X_test_resampled = np.array([resampling(p) for p in X_test])

        features_train = extract_features(X_train_resampled)
        features_test = extract_features(X_test_resampled)

        labels_train = np.array(seq_train, dtype=int).flatten()
        labels_test = np.array(seq_test, dtype=int).flatten()

        model = SVC(kernel="linear")
        model.fit(features_train, labels_train)
        pred_labels_test = model.predict(features_test)
        y_pred = np.array(pred_labels_test, dtype=int).flatten()

        accuracy = accuracy_score(labels_test, y_pred)
        all_accuracies.append(accuracy * 100)
        all_y_true.append(labels_test)
        all_y_pred.append(y_pred)

        print(f"User {i} accuracy: {accuracy * 100:.2f}%")

    mean_acc = np.mean(all_accuracies)
    std_acc = np.std(all_accuracies)
    print(f"\nMoyenne SVM User-Independent: {mean_acc:.2f}%")
    print(f"Écart-type SVM User-Independent: {std_acc:.2f}%")

    # Afficher les matrices de confusion après tous les scores
    for user_id in range(10):
        plot_conf_mat(all_y_true[user_id], all_y_pred[user_id], user_id)

def svm_user_dependent(points_array, seq_array):
    """
    Perform user-dependent validation using a linear SVM classifier.
    For each user, test 10 times by holding out one repetition per digit.
    """
    print("\n--- SVM USER-DEPENDENT ---")
    all_user_means = []
    all_user_stds = []
    all_y_true = []
    all_y_pred = []

    for j in range(1, 11):  # for each user
        user, lab = get_user(points_array, seq_array, j)
        accs_user = []
        y_true_user = []
        y_pred_user = []

        for i in range(1, 11):
            X_train, X_test, labt, labest = cross_validation_user_dependent_split(user, lab, i)

            X_train_resampled = np.array([resampling(p) for p in X_train])
            X_test_resampled = np.array([resampling(p) for p in X_test])

            features_train = extract_features(X_train_resampled)
            features_test = extract_features(X_test_resampled)

            model = SVC(kernel="linear")
            model.fit(features_train, labt.tolist())
            pred_labels_test = model.predict(features_test)

            y_true = np.array(labest, dtype=int).flatten()
            y_pred = np.array(pred_labels_test, dtype=int).flatten()
            acc = accuracy_score(y_true, y_pred)
            accs_user.append(acc)

            y_true_user.append(y_true)
            y_pred_user.append(y_pred)

            print(f"User {j} – try {i}: {acc*100:.2f}%")

        # Moyenne et écart-type des 10 essais du user j
        mean_user = np.mean(accs_user)
        std_user = np.std(accs_user)
        all_user_means.append(mean_user)
        all_user_stds.append(std_user)

        print(f"→ Moyenne User {j}: {mean_user*100:.2f}%, Std: {std_user*100:.2f}%")

        # Concaténer tous les y_true / y_pred du user j (pour matrice)
        all_y_true.append(np.concatenate(y_true_user))
        all_y_pred.append(np.concatenate(y_pred_user))

    # Affichage des matrices de confusion après tous les utilisateurs
    for user_id in range(10):
        plot_conf_mat(all_y_true[user_id], all_y_pred[user_id], user_id)




```
Simulation of SVM
```



In [None]:
if __name__ == "__main__":
    # Charger les données du Domaine 1
    data = read_files(domain=1)
    points_array = data[1]  # [x, y, z, t]
    seq_array = data[2]     # labels

    # Exécution de la validation user-dependent
    svm_user_dependent(points_array, seq_array)

    # Exécution de la validation user-independent
    svm_user_independent(points_array, seq_array)
