# Import and load the data

In [1]:
import os 
import sys
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.svm import SVC 
from sklearn.metrics import confusion_matrix, classification_report 

In [2]:
poses_sequence_path = '../../../Data/Fall/Dataset_CAUCAFall/Poses_sequences/'
list_of_files = os.listdir(poses_sequence_path)
list_of_files.sort()

In [3]:
df = pd.read_csv(poses_sequence_path + list_of_files[0], index_col=0)
df.head()

Unnamed: 0,X_nose,Y_nose,X_left_eye,Y_left_eye,X_right_eye,Y_right_eye,X_left_ear,Y_left_ear,X_right_ear,Y_right_ear,...,Y_right_hip,X_left_knee,Y_left_knee,X_right_knee,Y_right_knee,X_left_ankle,Y_left_ankle,X_right_ankle,Y_right_ankle,label
0,169.143264,185.373735,166.848344,180.783895,166.848344,178.488975,172.011914,176.767784,187.502626,160.129613,...,241.599281,255.202773,310.446889,272.414675,290.366337,274.135865,354.050373,285.610466,328.232521,Normal
1,179.250105,168.874884,178.000106,163.874887,178.625106,161.999889,184.875102,163.249888,205.500089,148.874897,...,252.624832,256.750057,309.499797,273.625047,292.624808,273.625047,352.624771,285.500039,329.499785,Normal
2,180.922243,168.069931,180.294801,162.422954,180.922243,161.168071,187.196661,162.422954,209.784566,147.364351,...,254.02946,257.470145,310.499224,273.783632,292.303411,273.783632,353.165268,285.705027,329.94992,Normal
3,180.909204,167.881737,180.269556,162.764552,181.548852,159.566312,187.305685,164.043849,209.053718,146.773352,...,254.234222,257.666969,310.523249,274.297818,293.252752,273.65817,352.740019,285.171834,329.71269,Normal
4,182.269733,165.962793,181.630085,160.845608,182.269733,157.647368,188.666214,162.124904,211.693543,145.494055,...,254.234222,257.108554,310.523249,273.739403,292.613104,273.739403,353.379667,285.253067,328.433394,Normal


On va entrainer un classifier sur des fenêtres glissantes des vidéos. On va donc créer une fonction qui va nous permettre de générer ces fenêtres glissantes. Ensuite pour chaque fenêtre on va extraire des caractéristiques pour ne pas avoir un nombre de features trop important. On va commencer par calculer, pour chaque fenêtre et chaque point clé, en x et en y, sa moyenne, son écart-type, sa valeur minimale et sa valeur maximale.

## Création des fenêtres glissantes

In [4]:
def get_sliding_windows(df, window_size, step_size):
    """
    Returns a list of dataframes of size window_size with step_size between each dataframe.
    
    Args:
        df (pd.DataFrame): Dataframe to be split into windows.
        window_size (int): Size of each window.
        step_size (int): Number of rows to skip between each window.
        
    Returns:
        windows (list): List of dataframes of size window_size with step_size between each dataframe.
    """
    
    windows = []
    for i in range(0, len(df) - window_size, step_size):
        windows.append(df[i:i+window_size])

    print("Total number of frames:", len(df))
    print("Number of windows:", len(windows))
    
    return windows

In [5]:
def get_features_from_window(window):
    """
    Extract features from a window of data. 

    Args:
        window (np.array): A window of data

    Returns:
        features (dict): A dictionary of features
    """
    features = {}

    for keypoint in window.columns:

        if keypoint == 'label':
            continue

        features[keypoint + '_mean'] = window[keypoint].mean()
        features[keypoint + '_std'] = window[keypoint].std()
        features[keypoint + '_min'] = window[keypoint].min()
        features[keypoint + '_max'] = window[keypoint].max()
            
    return features


def create_dataset(windows):
    """
    Create a dataset from a list of windows.

    Args:
        windows (list): A list of windows

    Returns:
        X (pd.DataFrame): A dataframe of features
        y (np.array): An array of labels
    """

    features = []
    window_labels = []
    for window in windows:
        features.append(get_features_from_window(window))
        if 'Fall' in window['label'].values:
            window_labels.append('Fall')
        else:
            window_labels.append(window['label'].iloc[-1])

    return pd.DataFrame(features), np.array(window_labels)


## Création d'un Dataset

In [7]:
root_dir = os.path.abspath('../../../')
sys.path.append(root_dir)
from Classifier.Fall.data_preparation import WindowFeatureExtractor

In [8]:

poses_sequence_path = '../../../Data/Fall/Dataset_CAUCAFall/Poses_sequences/'
    
window_size = 10
step_size = 5

window_feature_extractor = WindowFeatureExtractor(window_size, step_size)
X, y = WindowFeatureExtractor.prepare_data(poses_sequence_path)

TypeError: prepare_data() missing 1 required positional argument: 'files_list'

## Exploration des données

In [None]:
normal_frame_counts = []
fall_frame_counts = []
lying_frame_counts = []

for file in list_of_files:
    df = pd.read_csv(poses_sequence_path + file, index_col=0)
    value_counts = df.label.value_counts()

    normal_frame_counts.append(value_counts['Normal'])
    if len(value_counts) > 1:
        fall_frame_counts.append(value_counts['Fall'])
        lying_frame_counts.append(value_counts['Lying down'])
    else:
        fall_frame_counts.append(0)
        lying_frame_counts.append(0)
    
data = {'Normal': normal_frame_counts, 'Fall': fall_frame_counts, 'Lying down': lying_frame_counts}
df = pd.DataFrame(data=data, index=list_of_files)
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
sns.histplot(df, ax=ax, kde=True, bins=20)
ax.set_title('Distribution of the number of frames per video')
ax.set_xlabel('Number of frames')

In [None]:
np.unique(y, return_counts=True)

## Classifieur

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
np.unique(y_train, return_counts=True)

In [None]:
class_weights = {'Fall': 1, 'Lying down': 1, 'Normal': 1}

In [None]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
labels = np.unique(y_train)

## Création d'un classifieur

In [None]:
random_forest = RandomForestClassifier(random_state=0)
random_forest.fit(X_train_scaled, y_train)
random_forest.score(X_test_scaled, y_test)
print("Random Forest Accuracy: ", random_forest.score(X_test_scaled, y_test))

In [None]:
y_pred = random_forest.predict(X_test_scaled)
conf = confusion_matrix(y_test, y_pred, labels=labels)

## Visualisation

In [None]:
def plot_confusion_matrix(conf_matrix):
    fig, ax = plt.subplots(figsize=(7.5, 7.5))
    ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.5)
    for i in range(conf_matrix.shape[0]):
        for j in range(conf_matrix.shape[1]):
            ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='x-large')
    
    ax.set_yticks(range(len(labels)))
    ax.set_yticklabels(labels, rotation=45)

    ax.set_xticks(range(len(labels)))
    ax.set_xticklabels(labels, rotation=45)
    
    plt.xlabel('Predictions', fontsize=18)
    plt.ylabel('Ground Truth', fontsize=18)
    plt.title('Confusion Matrix', fontsize=18)
    plt.show()

plot_confusion_matrix(conf)

In [None]:
import pickle

file_name = 'RandomForest_WindowSize_{}_Step_{}'.format(window_size, step_size)
pickle.dump(X_test_scaled, open(file_name, 'wb'))

In [None]:
loaded_model = pickle.load(open(file_name, 'rb'))

## Test de Grid search