In [180]:
from pathlib import Path
import pandas as pd
import json
import matplotlib.pyplot as plt
import os

import swifter

In [181]:
class Cfg:
    RANDOM_STATE = 2023
    INPUT_ROOT = Path('/home/gpu/Kaggle/GOOGLE-ASL/data/raw')
    OUTPUT_ROOT = Path('/home/gpu/Kaggle/GOOGLE-ASL/notebooks/viz-out/')
    INDEX_MAP_FILE = INPUT_ROOT / 'sign_to_prediction_index_map.json'
    TRAN_FILE = INPUT_ROOT / 'train.csv'
    INDEX = 'sequence_id'
    ROW_ID = 'row_id'

In [182]:
def read_index_map(file_path=Cfg.INDEX_MAP_FILE):
    """Reads the sign to predict as json file."""
    with open(file_path, "r") as f:
        result = json.load(f)
    return result    

def read_train(file_path=Cfg.TRAN_FILE):
    """Reads the train csv as pandas data frame."""
    return pd.read_csv(file_path).set_index(Cfg.INDEX)

def read_landmark_data_by_path(file_path, input_root=Cfg.INPUT_ROOT):
    """Reads landmak data by the given file path."""
    data = pd.read_parquet(input_root / file_path)
    return data.set_index(Cfg.ROW_ID)

def read_landmark_data_by_id(sequence_id, train_data):
    """Reads the landmark data by the given sequence id."""
    file_path = train_data.loc[sequence_id]['path']
    return read_landmark_data_by_path(file_path)

In [183]:
train_data = read_train()
label_map = read_index_map()
train_data['label'] = train_data['sign'].map(label_map)
train_data['path'] = train_data['path'].apply(lambda path: os.path.join(Cfg.INPUT_ROOT, path))
print('# sequences:', len(train_data))
train_data.head()

# sequences: 94477


Unnamed: 0_level_0,path,participant_id,sign,label
sequence_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1000035562,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,26734,blow,25
1000106739,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,28656,wait,232
100015657,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,16069,cloud,48
1000210073,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,25571,bird,23
1000240708,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,62590,owie,164


In [184]:
# Subsample 
sub_train_data = train_data.sample(frac=0.02, random_state=42)
print('# sub sampled sequences:', len(sub_train_data))
sub_train_data.head()

# sub sampled sequences: 1890


Unnamed: 0_level_0,path,participant_id,sign,label
sequence_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3311214787,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,28656,sticky,206
3588192588,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,53618,before,20
1363575346,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,4718,pretty,178
951199059,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,37779,hen,114
283190141,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,36257,tomorrow,221


# Extract information from the files

In [525]:
POSE_LOWER_BODY_LANDMARKS = list(range(23, 33))

def myf(path):
    parquet_df = read_landmark_data_by_path(path)
    n_frames = len(parquet_df.frame.value_counts())
    
    # Ignore z
    lhand_query = parquet_df.query('type == "left_hand"')[['x', 'y']].values.reshape(n_frames, -1, 2)
    rhand_query = parquet_df.query('type == "right_hand"')[['x', 'y']].values.reshape(n_frames, -1, 2)
    pose_query = parquet_df.query('type == "pose"')[['x', 'y']].values.reshape(n_frames, -1, 2)
    face_query = parquet_df.query('type == "face"')[['x', 'y']].values.reshape(n_frames, -1, 2)
    
    lhand_nan_count = np.isnan(lhand_query).sum()
    rhand_nan_count = np.isnan(rhand_query).sum()
    pose_nan_count = np.isnan(pose_query).sum()
    face_nan_count = np.isnan(face_query).sum()
    
    lhand_query_size = lhand_query.size
    rhand_query_size = rhand_query.size
    pose_query_size = pose_query.size
    face_query_size = face_query.size
    
    has = lambda nan_count, query_size: nan_count < query_size
    has_lhand = has(lhand_nan_count, lhand_query_size)
    has_rhand = has(rhand_nan_count, rhand_query_size)
    has_pose = has(pose_nan_count, pose_query_size)
    has_face = has(face_nan_count, face_query_size)
    
    has_complete = lambda nan_count: not nan_count
    has_complete_lhand = has_complete(lhand_nan_count)
    has_complete_rhand = has_complete(rhand_nan_count)
    has_complete_pose = has_complete(pose_nan_count)
    has_complete_face = has_complete(face_nan_count)
    
    # np.argwhere result (n_frame, landmark_id, coord_idx(0/x,1/y)
    out_of_frame_values = lambda query: np.argwhere((query < 0) | (query > 1))[:, 1]
    out_of_frame_values_pct_lhand = out_of_frame_values(lhand_query).size / lhand_query_size
    out_of_frame_values_pct_rhand = out_of_frame_values(rhand_query).size / rhand_query_size
    # Filter lower body landmarks idxs
    out_of_frame_values_pct_pose = np.isin(out_of_frame_values(pose_query), POSE_LOWER_BODY_LANDMARKS, invert=True).sum()
    out_of_frame_values_pct_pose = out_of_frame_values_pct_pose / pose_query_size
    out_of_frame_values_pct_face = out_of_frame_values(face_query).size / face_query_size

    return [n_frames, has_lhand, has_rhand, has_pose, has_face, 
            has_complete_lhand, has_complete_rhand, has_complete_pose,
            has_complete_face, out_of_frame_values_pct_lhand,
            out_of_frame_values_pct_rhand, out_of_frame_values_pct_pose, 
            out_of_frame_values_pct_face]

def generate_extended_csv(train_data, name='extended_train_data'):
    train_data[['n_frames', 'has_lhand', 'has_rhand', 'has_pose', 'has_face',
                'has_complete_lhand', 'has_complete_rhand', 'has_complete_pose', 
                'has_complete_face', 'out_of_frame_values_pct_lhand',
                'out_of_frame_values_pct_rhand', 'out_of_frame_values_pct_pose', 
                'out_of_frame_values_pct_face']] = train_data.path.swifter.apply(myf).to_list()
    train_data.to_csv(f"{name}.csv")
    
    return train_data

def print_extended_data_stats(train_data):
    print('Out of frame values (min, max, mean):')
    print('Rhand:', train_data.out_of_frame_values_pct_rhand.min(), train_data.out_of_frame_values_pct_rhand.max(), train_data.out_of_frame_values_pct_rhand.mean())
    print('Lhand:', train_data.out_of_frame_values_pct_lhand.min(), train_data.out_of_frame_values_pct_lhand.max(), train_data.out_of_frame_values_pct_lhand.mean())
    print('Pose:', train_data.out_of_frame_values_pct_pose.min(), train_data.out_of_frame_values_pct_pose.max(), train_data.out_of_frame_values_pct_pose.mean())
    print('Face:', train_data.out_of_frame_values_pct_face.min(), train_data.out_of_frame_values_pct_face.max(), train_data.out_of_frame_values_pct_face.mean())

In [522]:
sub_train_data = generate_extended_csv(sub_train_data, 'extended_sub_train_data')
sub_train_data

TypeError: cannot construct a FileSource from sequence_id
1051321002    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
3655711088    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
1186956829    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
3260917588    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
448237385     /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
                                    ...                        
443965795     /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
2790551577    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
753760432     /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
3205476890    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
2503497171    /home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...
Name: path, Length: 76, dtype: object

Pandas Apply:   0%|          | 0/1890 [00:00<?, ?it/s]

Unnamed: 0_level_0,path,participant_id,sign,label,n_frames,has_lhand,has_rhand,has_pose,has_face,has_complete_lhand,has_complete_rhand,has_complete_pose,has_complete_face,out_of_frame_values_pct_lhand,out_of_frame_values_pct_rhand,out_of_frame_values_pct_pose,out_of_frame_values_pct_face
sequence_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3311214787,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,28656,sticky,206,21,False,True,True,True,False,True,True,True,0.0,0.004535,0.147186,0.0
3588192588,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,53618,before,20,101,False,True,True,True,False,False,True,True,0.0,0.0,0.131413,0.0
1363575346,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,4718,pretty,178,127,False,True,True,True,False,False,True,True,0.0,0.0,0.07874,0.0
951199059,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,37779,hen,114,9,False,True,True,True,False,True,True,True,0.0,0.0,0.060606,0.0
283190141,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,36257,tomorrow,221,51,True,False,True,True,False,False,True,True,0.0,0.0,0.134878,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2150778146,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,29302,snack,202,103,False,True,True,True,False,False,True,True,0.0,0.0,0.059576,0.0
1739933438,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,2044,loud,137,19,False,True,True,True,False,False,True,True,0.0,0.001253,0.104466,0.0
1873415260,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,29302,yucky,247,14,False,True,True,True,False,False,True,True,0.0,0.0,0.119048,0.0
2909341732,/home/gpu/Kaggle/GOOGLE-ASL/data/raw/train_lan...,27610,book,28,87,True,False,True,True,False,False,True,True,0.0,0.0,0.148903,0.0


In [526]:
print_extended_data_stats(sub_train_data)

Out of frame values (min, max, mean):
Rhand: 0.0 0.10081845238095238 0.001029232030904213
Lhand: 0.0 0.1349206349206349 0.0005522328466792244
Pose: 0.0 0.23148148148148148 0.10404281926363297
Face: 0.0 0.0 0.0


In [499]:
def plot_out_of_frame_by_sign(train_data, name="out-of-frame-by-sign"):
    ooframe_signs_df = train_data.groupby('sign').mean()[['out_of_frame_values_pct_lhand', 'out_of_frame_values_pct_rhand', 
                                                              'out_of_frame_values_pct_pose', 'out_of_frame_values_pct_face']]
    ooframe_signs_df = ooframe_signs_df.round(decimals=2)
    signs = tuple(ooframe_signs_df.index)
    ooframe_means = {
        'LHand': tuple(ooframe_signs_df.out_of_frame_values_pct_lhand.values),
        'RHand': tuple(ooframe_signs_df.out_of_frame_values_pct_rhand.values),
        'Pose': tuple(ooframe_signs_df.out_of_frame_values_pct_pose.values),
        'Face': tuple(ooframe_signs_df.out_of_frame_values_pct_face.values),
    }
    x = np.arange(len(signs)) # the label locations
    width = 0.25  # the width of the bars
    multiplier = 0

    fig, ax = plt.subplots(layout='constrained', figsize=(40, 5))

    for attribute, measurement in ooframe_means.items():
        offset = width * multiplier
        rects = ax.bar(x + offset, measurement, width, label=attribute)
        # ax.bar_label(rects, padding=3)
        multiplier += 1

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('% out of frame values')
    ax.set_title('Out of frame values by sign')
    ax.set_xticks(x + width, signs, rotation='vertical')
    ax.legend()
    plt.savefig(f"{name}.png", bbox_inches='tight', dpi=300)
    plt.close()
    
plot_out_of_frame_by_sign(sub_train_data, 'out-of-frame-by-sign')

# Run functions on the whole dataset

In [None]:
train_data = generate_extended_csv(train_data)


# Number of frames inspection.

- There are 11744 sequences with that have six frames. A 12.43% of the data.

---

## Fix the number of frames.

In order to perform batch training the number of frames should be the same across a batch. There are several options that allow that:
- Set the number of frames to a constant value. Those sequences that have less or more that this fixed number, extra frames should be interpolated. 
- Cross-validate the number of frames to use during training and get the best that solves the problem.
- Create an ensamble trained with different frame values.

## Clean sequences that have low number frames.

It is sensible to drop some sequences that present low frames.

Options:
- Drop any sequence below a given number of frames, cross-validate that value.

## Clean sequences that have high number of frames.

Same as previous section.

In [None]:
frame_freq_df = train_data.value_counts(subset='n_frames').rename_axis('n_frames').to_frame('count').sort_index()
frame_freq_df.head()

In [None]:
frame_freq_df.loc[2:40].plot(kind='bar')

There are 103 sequences that have two frames.

They affect 82 signs in total.

- There are 64 signs that have one affected sequence.
- There are 15 signs that have two affected sequences.
- There are 3 signs that have three affected sequences.

---

We should drop them.

In [None]:
two_frame = train_data[train_data.n_frames == 2]
two_frame.head()

In [None]:
print('Total affected frames:', len(two_frame))

In [None]:
two_frame_counts = two_frame.sign.value_counts().rename_axis('sign').to_frame('counts')
two_frame_counts.head()

In [None]:
print('Total signs affected:', len(two_frame_counts))

In [None]:
two_frame_counts.counts.value_counts().rename_axis('nb_affected_seq').to_frame('nb_affected_signs')

# Number of features

## Reduction

There may be some keypoints (such as those belonging to lower parts of the body) that do not contribute to recognize a sign.

## Increase

Features could be extended, examples are:
- Joint motion stream
- Bone motion stream

--- 


## Treat missing values

For some joints there are missing kepoints from a frame to another. As commented previously, when setting the fixed number of frames some interpolation has to be carried on. So, if missing keypoints are set to 0, previous to the interpolation step, they are going to be assigned a value.

In [None]:
read_landmark_data_by_path(train_data.loc[1000035562].path)

In [None]:
train_data.path.values.tolist()[0]

# Function used in evaluation

In [13]:
import numpy as np

ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [14]:
data = load_relevant_data_subset(train_data.loc[1000106739].path)

In [15]:
data.shape

(11, 543, 3)

# Landmarks

In [17]:
sample = read_landmark_data_by_path(train_data.loc[1000035562].path)
sample

Unnamed: 0_level_0,frame,type,landmark_index,x,y,z
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20-face-0,20,face,0,0.494400,0.380470,-0.030626
20-face-1,20,face,1,0.496017,0.350735,-0.057565
20-face-2,20,face,2,0.500818,0.359343,-0.030283
20-face-3,20,face,3,0.489788,0.321780,-0.040622
20-face-4,20,face,4,0.495304,0.341821,-0.061152
...,...,...,...,...,...,...
42-right_hand-16,42,right_hand,16,0.001660,0.549574,-0.145409
42-right_hand-17,42,right_hand,17,0.042694,0.693116,-0.085307
42-right_hand-18,42,right_hand,18,0.006723,0.665044,-0.114017
42-right_hand-19,42,right_hand,19,-0.014755,0.643799,-0.123488


In [24]:
sample.type.unique()

array(['face', 'left_hand', 'pose', 'right_hand'], dtype=object)

In [20]:
frame20_face = sample.query('frame == 20 & type == "face"')
frame20_face

Unnamed: 0_level_0,frame,type,landmark_index,x,y,z
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20-face-0,20,face,0,0.494400,0.380470,-0.030626
20-face-1,20,face,1,0.496017,0.350735,-0.057565
20-face-2,20,face,2,0.500818,0.359343,-0.030283
20-face-3,20,face,3,0.489788,0.321780,-0.040622
20-face-4,20,face,4,0.495304,0.341821,-0.061152
...,...,...,...,...,...,...
20-face-463,20,face,463,0.536965,0.299310,0.000543
20-face-464,20,face,464,0.529594,0.301546,-0.004426
20-face-465,20,face,465,0.524728,0.303110,-0.011502
20-face-466,20,face,466,0.592437,0.293800,-0.003560


In [30]:
all(frame20_face.landmark_index.to_numpy() == np.arange(468))

True

In [31]:
frame20_lhand = sample.query('frame == 20 & type == "left_hand"')
frame20_lhand

Unnamed: 0_level_0,frame,type,landmark_index,x,y,z
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20-left_hand-0,20,left_hand,0,,,
20-left_hand-1,20,left_hand,1,,,
20-left_hand-2,20,left_hand,2,,,
20-left_hand-3,20,left_hand,3,,,
20-left_hand-4,20,left_hand,4,,,
20-left_hand-5,20,left_hand,5,,,
20-left_hand-6,20,left_hand,6,,,
20-left_hand-7,20,left_hand,7,,,
20-left_hand-8,20,left_hand,8,,,
20-left_hand-9,20,left_hand,9,,,


# Perfrom preprocessing on the whole dataset

In [None]:

# train_data['n_frames'] = train_data.path.swifter.apply(lambda path: len(read_landmark_data_by_path(path).frame.value_counts()))
# train_data.head()
# train_data.to_csv('train_data_aug.csv')
# train_data.n_frames.mean(), train_data.n_frames.min(), train_data.n_frames.max()