# Extarct handcrafted features 
- for each person, for x frames, calculate the nonverbal features from head and body landmarks 
- the x frames are: 64 frames (\~2.5s), 64\*2 (\~5s) and 5*30(=5s)
- the first two are to be compared with the deep features, the last is for the actual labeling 

- raw data --> low-level --> high-level 

### imports and vars

In [4]:
import os
import sys
import numpy as np
import pandas as pd 
from tqdm import tqdm
from outliers import smirnov_grubbs as grubbs
from statistics import mean, stdev, variance
from scipy.stats import skew, kurtosis
from scipy.signal import medfilt, find_peaks

base_path = '/home/sharifa/speedDating/'
raw_features_path = os.path.join(base_path,'speedDating_Detectron_named_3D_with_gazeFollow/')


### help functions 

In [52]:
MidHip = 0
RHip = 1
RKnee = 2
RAnkle = 3
LHip = 4
LKnee = 5
LAnkle = 6
MidBack = 7
Neck = 8
Nose = 9
forehead =10
LShoulder = 11
LElbow = 12
LWrist = 13
RShoulder = 14
RElbow = 15
RWrist = 16

def body_get_headers():
    body_features_heads =['body_pitch', 'body_roll', 'body_yaw',
                         'lhand_face', 'rhand_face', 
                         'lhand_body', 'rhand_body', 
                         'rhand_lhand']
    return body_features_heads



def head_get_headers():
    head_features_heads = ['head_pitch', 'head_roll', 'head_yaw']

    return head_features_heads

def get_stat_headers(features_heads):
    # return stat_features
    stats_features = ['f_min', 'f_max', 'f_rang', 'f_mean', 'f_std', 'f_var', 'f_skew', 'f_kurt', 'f_peaks', 'f_valys',
                         'd1_min', 'd1_max', 'd1_rang', 'd1_mean', 'd1_std', 'd1_var', 'd1_skew', 'd1_kurt', 'd1_peaks', 'd1_valys',
                         'd2_min', 'd2_max', 'd2_rang', 'd2_mean', 'd2_std', 'd2_var', 'd2_skew', 'd2_kurt', 'd2_peaks', 'd2_valys']

    high_feature_headers = []
    for i in range(len(features_heads)):
        for j in range(len(stats_features)):
            high_feature_headers.append(features_heads[i] + '-' + stats_features[j])

    return high_feature_headers


def body_sync_one(pose):
    
    nose = pose[Nose]
    center = pose[Neck]
    rhand = pose[RWrist]
    lhand = pose[LWrist]

    lhand_face, rhand_face = None, None
    lhand_body, rhand_body = None, None
    lhand_rhand = None

    if type(nose) != type(None):
        if type(lhand) != type(None):
            lhand_face = np.linalg.norm(lhand - nose)
        if type(rhand) != type(None):
            rhand_face = np.linalg.norm(rhand - nose)

    if type(center) != type(None):
        if type(lhand) != type(None):
            lhand_body = np.linalg.norm(lhand - center)
        if type(rhand) != type(None):
            rhand_body = np.linalg.norm(rhand - center)

    if type(lhand) != type(None):
        if type(rhand) != type(None):
            lhand_rhand = np.linalg.norm(lhand - rhand)

    body_feature = [lhand_face, rhand_face,
                    lhand_body, rhand_body,
                    lhand_rhand]
    return np.array(body_feature)


### step 1 -- extract and save low-level features (raw data --> low-level)
- raw data are saved per frame for every one in the frame
- wants to make low-level files one for each person
- coloumns = low-level features
- raws = each frame 


In [55]:
def extract_raw(points_folder, people_list):
    files_total = len(os.listdir(points_folder))
    all_raw_features = {}
    for person in people_list:
        all_raw_features[person]=[]
    
    # keep looping infinitely
    for i in tqdm(range(files_total)):
#         if i >=10:
#             break
        point_file = os.path.join(points_folder, '{}.npy'.format(str(i).zfill(6)))

        if not os.path.exists(point_file):
            for person in all_raw_features.keys():
                all_raw_features[person].append([])
            continue
        im_res = np.load(point_file, allow_pickle=True)
        
        #plot BBox and name
        this_frame_people = []
        for human in im_res:
            name = str(human['face_name'])
            if not name.startswith('P'):
                continue
            if name in this_frame_people:
                continue
            this_frame_people.append(name)
            
            if name not in all_raw_features.keys():
                print('a new person named {} in {}'.format(name,points_folder))
                all_raw_features[name] = []
            
            # body joints
            keypoints_3D = (human['3d_keypoints']).astype(np.float32)
            body_pose = human['body_pose']
            head_pose = human['head_pose']

            #low-level features
            raw_features = np.append(body_pose, np.append(body_sync_one(keypoints_3D),head_pose))

            all_raw_features[name].append(raw_features)
            
    return all_raw_features

In [56]:
person_order = {
    'F1_Interaction_1.mp4':{'P2':'older girl','P1':'younger girl','P3':'mother'},
    'F1_Interaction_2.mp4':{'P2':'older girl','P1':'younger girl','P3':'mother'},

    'F2_Interaction_1.mp4':{'P4':'boy','P5':'father'},
    'F2_Interaction_2.mp4':{'P4':'boy'},

    'F3_Interaction_1.mp4':{'P8':'father','P6':'girl','P7':'boy'},
    'F3_Interaction_2.mp4':{'P6':'girl','P7':'boy'},

    'F4_Interaction_1.mp4':{'P14':'mother','P12':'older girl','P11':'younger girl','P10':'older boy','P9':'younger boy','P13':'father'},
    'F4_Interaction_2.mp4':{'P12':'older girl','P11':'younger girl','P10':'older boy','P9':'younger boy','P13':'father'},

    'F5_Interaction_1.mp4':{'P16':'mother','P15':'boy'},
    'F5_Interaction_2.mp4':{'P16':'mother','P15':'boy'},

    'F6_Interaction_1.mp4':{'P19':'father','P18':'girl','P17':'boy'},
    'F6_Interaction_2.mp4':{'P19':'father','P18':'girl','P17':'boy'},

    'F7_Interaction_1.mp4':{'P22':'father','P20':'younger boy','P21':'older boy','P23':'mother'},

    'F8_Interaction_1.mp4':{'P24':'girl','P25':'father'},
    'F8_Interaction_2.mp4':{'P24':'girl','P25':'father'},
    'F8_Interaction_3.mp4':{'P24':'girl','P25':'father'},

    'F10_Interaction_1.mp4': {'P27':'left girl (green top)', 'P28':'right girl (white top)'},

    'F11_Interaction_1.mp4': {'P29':'boy', 'P30':'mother'},
    'F11_Interaction_2.mp4':{'P29':'boy','P30':'mother'},

    'F13_Interaction_1.mp4':{'P32':'girl','P33':'mother'},

    'F17_Interaction_1.mp4': {'P37':'girl', 'P38':'mother'},
    'F17_Interaction_2.mp4':{'P37':'girl','P38':'mother'}
}


low_csv_path = os.path.join(base_path,'raw_features/')
    
onlyfolders = [os.path.join(raw_features_path, f) for f in os.listdir(raw_features_path) if not
                  os.path.isfile(os.path.join(raw_features_path, f)) and f.startswith('F')]
onlyfolders.sort()
for folder in onlyfolders:
    save_folder = os.path.basename(folder).split('.')[0]
    people_list = list(person_order[save_folder+'.mp4'].keys())
    
    person = people_list[0]
    low_csv_file = os.path.join(low_csv_path,'_'.join([save_folder,person,'low_level'])+'.csv')
    if os.path.exists(low_csv_file):
        print('Already processed:',low_csv_file)
        continue
    
    readingpath = os.path.join(raw_features_path,
                              '{}.mp4'.format(save_folder),
                              '{}_frame-json'.format(save_folder))

    print('Processing:',save_folder)
    #print(people_list)
    
    #extract low-level for this folder for all people 
    this_group_low_level = extract_raw(readingpath,people_list)
    
    for this_person in this_group_low_level.keys():
        low_data = this_group_low_level[this_person]
        
        #save low-level features for each person 
        low_csv_file = os.path.join(low_csv_path,'_'.join([save_folder,this_person,'low_level'])+'.csv')
        low_header = body_get_headers() + head_get_headers()
        #np.savetxt(low_csv_file, low_data, delimiter=',', header=low_header)
        low_data_df = pd.DataFrame(low_data)
        low_data_df.to_csv(low_csv_file, header=low_header, index=None)



Processing: F10_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:10<00:00, 3065.48it/s]


Processing: F11_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:13<00:00, 2301.15it/s]


Processing: F11_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31142/31142 [00:12<00:00, 2573.51it/s]


Processing: F13_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:12<00:00, 2491.40it/s]


Processing: F17_Interaction_1


 45%|█████████████████████████████████████████████████████████████████████▍                                                                                   | 14434/31823 [00:05<00:06, 2510.46it/s]

a new person named P27 in /home/sharifa/speedDating/speedDating_Detectron_named_3D_with_gazeFollow/F17_Interaction_1.mp4/F17_Interaction_1_frame-json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:12<00:00, 2552.88it/s]


Processing: F17_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31567/31567 [00:11<00:00, 2655.10it/s]


Processing: F1_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:12<00:00, 2644.60it/s]


Processing: F1_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:15<00:00, 2098.61it/s]


Processing: F2_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:12<00:00, 2475.83it/s]


Processing: F2_Interaction_2


 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                       | 9333/11036 [00:03<00:00, 2831.14it/s]

a new person named P5 in /home/sharifa/speedDating/speedDating_Detectron_named_3D_with_gazeFollow/F2_Interaction_2.mp4/F2_Interaction_2_frame-json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11036/11036 [00:03<00:00, 3018.52it/s]


Processing: F3_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31809/31809 [00:14<00:00, 2255.64it/s]


Processing: F3_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31814/31814 [00:13<00:00, 2274.79it/s]


Processing: F4_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:20<00:00, 1589.75it/s]


Processing: F4_Interaction_2


 10%|██████████████▉                                                                                                                                           | 3098/31823 [00:02<00:17, 1622.28it/s]

a new person named P14 in /home/sharifa/speedDating/speedDating_Detectron_named_3D_with_gazeFollow/F4_Interaction_2.mp4/F4_Interaction_2_frame-json


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:18<00:00, 1709.38it/s]


Processing: F5_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:14<00:00, 2238.58it/s]


Processing: F5_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:12<00:00, 2471.51it/s]


Processing: F6_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:14<00:00, 2124.85it/s]


Processing: F6_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:14<00:00, 2185.25it/s]


Processing: F7_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31809/31809 [00:16<00:00, 1928.99it/s]


Processing: F8_Interaction_1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31808/31808 [00:13<00:00, 2328.73it/s]


Processing: F8_Interaction_2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:12<00:00, 2461.67it/s]


Processing: F8_Interaction_3


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:13<00:00, 2412.04it/s]


In [58]:
### in F17_1, P37 was mistikanely named as P27 this is to fix this
def extract_raw_f17(points_folder, people_list):
    files_total = len(os.listdir(points_folder))
    all_raw_features = {}
    for person in people_list:
        all_raw_features[person]=[]
    
    # keep looping infinitely
    for i in tqdm(range(files_total)):
#         if i >=10:
#             break
        point_file = os.path.join(points_folder, '{}.npy'.format(str(i).zfill(6)))

        if not os.path.exists(point_file):
            for person in all_raw_features.keys():
                all_raw_features[person].append([])
            continue
        im_res = np.load(point_file, allow_pickle=True)
        
        #plot BBox and name
        this_frame_people = []
        for human in im_res:
            name = str(human['face_name'])
            if not name.startswith('P'):
                continue
            if name in this_frame_people:
                continue
            name = 'P37' if name=='P27' else name
            this_frame_people.append(name)
            
            if name not in all_raw_features.keys():
                print('a new person named {} in {}'.format(name,points_folder))
                all_raw_features[name] = []
            
            # body joints
            keypoints_3D = (human['3d_keypoints']).astype(np.float32)
            body_pose = human['body_pose']
            head_pose = human['head_pose']

            #low-level features
            raw_features = np.append(body_pose, np.append(body_sync_one(keypoints_3D),head_pose))

            all_raw_features[name].append(raw_features)
    return all_raw_features

save_folder = 'F17_Interaction_1'
people_list = list(person_order[save_folder+'.mp4'].keys())
readingpath = '/home/sharifa/speedDating/speedDating_Detectron_named_3D_with_gazeFollow/F17_Interaction_1.mp4/F17_Interaction_1_frame-json'


this_group_low_level = extract_raw_f17(readingpath,people_list)

for this_person in this_group_low_level.keys():
    low_data = this_group_low_level[this_person]

    #save low-level features for each person 
    low_csv_file = os.path.join(low_csv_path,'_'.join([save_folder,this_person,'low_level'])+'.csv')
    low_header = body_get_headers() + head_get_headers()
    #np.savetxt(low_csv_file, low_data, delimiter=',', header=low_header)
    low_data_df = pd.DataFrame(low_data)
    #print(low_data_df)
    low_data_df.to_csv(low_csv_file, header=low_header, index=None)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31823/31823 [00:10<00:00, 3179.89it/s]


### step 2 -- extract and save high-level features (low-level --> high-level)
- low-level features are merged for every x frames (explained above)
- temporal features are extarcted for that window size and saved per person 

- coloumns = high-level features
- raws = temporal features every x frame 
- saved in 3 variations:  64 frames (~2.5s), 64*2 (~5s) and 5*30(=5s)


In [2]:

def statstic_features(all_features, window_size):
    all_features = np.array(all_features)
    epsilon = np.float32(0.00001)
    all_stats = np.array([])
    #for each window size in rows 
    total_rows = all_features.shape[0]
    total_cols = all_features.shape[1]
    for this_step in range(0,total_rows,window_size):
        end_win = window_size if this_step+window_size < total_rows else total_rows
        this_sub_data = all_features[this_step:this_step+end_win,:]
        if len(this_sub_data) <3:
            print('less than 3 rows')
            continue
        
        #for each feature/column
        for ff in range(total_cols):
            this_feature = this_sub_data[:,ff]

            #find and remove outliers
            this_feature = grubbs.test(this_feature, alpha=0.05)
            #de-noise: smooth the signal - median filter
            this_feature = medfilt(this_feature)

            #extract the deltas
            delta1_this_feature = np.append(this_feature[0], np.diff(this_feature))
            delta2_this_feature = np.append(delta1_this_feature[0], np.diff(delta1_this_feature))

            # extract stats
            f_min = min(this_feature)
            d1_min = min(delta1_this_feature)
            d2_min = min(delta2_this_feature)

            f_max = max(this_feature)
            d1_max = max(delta1_this_feature)
            d2_max = max(delta2_this_feature)

            f_rang = f_max - f_min
            d1_rang = d1_max - d1_min
            d2_rang = d2_max - d2_min

            f_mean = mean(this_feature)
            try:
                d1_mean = mean(delta1_this_feature)
                d2_mean = mean(delta2_this_feature)
            except:
                print("SAD")
                print(delta1_this_feature)

            f_std = stdev(this_feature)
            d1_std = stdev(delta1_this_feature)
            d2_std = stdev(delta2_this_feature)

            f_var = variance(this_feature)
            d1_var = variance(delta1_this_feature)
            d2_var= variance(delta2_this_feature)

            #extarct skewness and kurtosis
            f_skew = skew(this_feature)
            d1_skew = skew(delta1_this_feature)
            d2_skew= skew(delta2_this_feature)

            f_kurt = kurtosis(this_feature)
            d1_kurt = kurtosis(delta1_this_feature)
            d2_kurt= kurtosis(delta2_this_feature)

            #extract peaks and valys

            try:
                test = (1 /delta1_this_feature)
            except:
                for i in range(len(delta1_this_feature)):
                    if delta1_this_feature[i] == 0.0:
                        delta1_this_feature[i] = epsilon


            f_peaks = len(find_peaks(this_feature)[0])
            d1_peaks = len(find_peaks(delta1_this_feature)[0])
            d2_peaks = len(find_peaks(delta2_this_feature)[0])

            f_valys = len(find_peaks(1 /this_feature)[0])
            d1_valys = len(find_peaks(1 /delta1_this_feature)[0])
            d2_valys = len(find_peaks(1 /delta2_this_feature)[0])

            f_stats = np.array([f_min, f_max, f_rang, f_mean, f_std, f_var, f_skew, f_kurt, f_peaks, f_valys,
                         d1_min, d1_max, d1_rang, d1_mean, d1_std, d1_var, d1_skew, d1_kurt, d1_peaks, d1_valys,
                         d2_min, d2_max, d2_rang, d2_mean, d2_std, d2_var, d2_skew, d2_kurt, d2_peaks, d2_valys])

            if ff == 0:
                stats = f_stats
            else:
                stats = np.append(stats, f_stats, axis=0)
        if len(all_stats) == 0:
            all_stats = stats
        else:
            all_stats = np.vstack((all_stats,stats))
    return all_stats



### loop through all folders

In [5]:
window_frames = [64, 64*2, 5*30]
window_names = ['2.5','5','5s']

low_csv_path = os.path.join(base_path,'raw_features/')
only_low_csv_files = [os.path.join(low_csv_path, f) for f in os.listdir(low_csv_path) if 
                  os.path.isfile(os.path.join(low_csv_path, f)) and f.startswith('F')]
only_low_csv_files.sort()

for low_csv_file in only_low_csv_files:
    save_file = os.path.basename(low_csv_file).replace('_low_level.csv','')
    print('Processing:',save_file)
    
    low_level_df = pd.read_csv(low_csv_file)
    low_level_data = np.nan_to_num(low_level_df.values)
    
    for i in range(len(window_frames)):
        window_size = window_frames[i]
        window_name = window_names[i]
    
        high_npy_path = os.path.join('./features','handcrafted_features_' + window_name)
        os.makedirs(high_npy_path, exist_ok = True)
        
        high_npy_file = os.path.join(high_npy_path,'_'.join([save_file,'high_level'])+ '.npy')
        if os.path.exists(high_npy_file):
            print('Already Processed:{}-{}'.format(window_name,high_npy_file))
            continue

        #extarct high-level for each person/file 
        high_data = statstic_features(low_level_data, window_size)

        #save high-level features for each person/file 
        np.save(high_npy_file, high_data)


Processing: F10_Interaction_1_P27
Already Processed:2.5-./features/handcrafted_features_2.5/F10_Interaction_1_P27_high_level.npy
Already Processed:5-./features/handcrafted_features_5/F10_Interaction_1_P27_high_level.npy
Already Processed:5s-./features/handcrafted_features_5s/F10_Interaction_1_P27_high_level.npy
Processing: F10_Interaction_1_P28
Already Processed:2.5-./features/handcrafted_features_2.5/F10_Interaction_1_P28_high_level.npy
Already Processed:5-./features/handcrafted_features_5/F10_Interaction_1_P28_high_level.npy
Already Processed:5s-./features/handcrafted_features_5s/F10_Interaction_1_P28_high_level.npy
Processing: F11_Interaction_1_P29
Already Processed:2.5-./features/handcrafted_features_2.5/F11_Interaction_1_P29_high_level.npy
Already Processed:5-./features/handcrafted_features_5/F11_Interaction_1_P29_high_level.npy
Already Processed:5s-./features/handcrafted_features_5s/F11_Interaction_1_P29_high_level.npy
Processing: F11_Interaction_1_P30
Already Processed:2.5-./fea

  test = (1 /delta1_this_feature)
  f_valys = len(find_peaks(1 /this_feature)[0])
  d1_valys = len(find_peaks(1 /delta1_this_feature)[0])
  d2_valys = len(find_peaks(1 /delta2_this_feature)[0])
  g = value / data.std()


less than 3 rows
Processing: F3_Interaction_1_P7
less than 3 rows
Processing: F3_Interaction_1_P8
Processing: F3_Interaction_2_P6




Processing: F3_Interaction_2_P7
Processing: F4_Interaction_1_P10
Processing: F4_Interaction_1_P11
Processing: F4_Interaction_1_P12
Processing: F4_Interaction_1_P13
Processing: F4_Interaction_1_P14
Processing: F4_Interaction_1_P9
Processing: F4_Interaction_2_P10
Processing: F4_Interaction_2_P11
Processing: F4_Interaction_2_P12
Processing: F4_Interaction_2_P13
less than 3 rows
Processing: F4_Interaction_2_P14
Processing: F4_Interaction_2_P9
Processing: F5_Interaction_1_P15
Processing: F5_Interaction_1_P16
Processing: F5_Interaction_2_P15
Processing: F5_Interaction_2_P16
Processing: F6_Interaction_1_P17
Processing: F6_Interaction_1_P18
Processing: F6_Interaction_1_P19
Processing: F6_Interaction_2_P17
Processing: F6_Interaction_2_P18
Processing: F6_Interaction_2_P19
Processing: F7_Interaction_1_P20
Processing: F7_Interaction_1_P21
Processing: F7_Interaction_1_P22
less than 3 rows
Processing: F7_Interaction_1_P23
Processing: F8_Interaction_1_P24
Processing: F8_Interaction_1_P25
Processing: 