In [2]:
import os
import pickle
from pathlib import Path
import json

import pandas as pd
import numpy as np

In [3]:
# open examples
pilot_path = '/home/jose/drive/data/lared_laughter/pilot2'
laughter_examples_df = pd.read_csv(os.path.join(pilot_path, 'laughter_examples', 'examples.csv'), index_col=0)
speech_examples_df = pd.read_csv(os.path.join(pilot_path, 'speech_examples', 'examples.csv'), index_col=0)

In [4]:
laughter_examples_df.head()

Unnamed: 0,id,pid,cam,valid,hash,_ini_time,_end_time,ini_time,end_time,labels,rect,ini,len
0,,1,2,True,3697019e90ec3c36979d8f5d97bd87e12b09cb83640554...,1028.28,1030.08,1026.363079,1032.543362,"{'vad': None, 'label': None, 'vad_seg': None}",,,
1,,1,3,True,bbbb2623b09bcb1896967141eedf5c56aec51379bfdc1e...,1028.28,1030.08,1026.363079,1032.543362,"{'vad': None, 'label': None, 'vad_seg': None}",,,
2,,1,4,True,24db998367045b41ad6990ee79cb650dd2329b3c899939...,1028.28,1030.08,1026.363079,1032.543362,"{'vad': None, 'label': None, 'vad_seg': None}",,,
3,,1,2,True,533467606958d0cf5ebdd394172b5f975b14d3fa3121b6...,1049.98,1051.3,1047.638924,1054.518364,"{'vad': None, 'label': None, 'vad_seg': None}",,,
4,,1,3,True,a5c3063d3e9a55b9426fd67c84ac0bc82cb605d8c70e82...,1049.98,1051.3,1047.638924,1054.518364,"{'vad': None, 'label': None, 'vad_seg': None}",,,


In [5]:
laughter_examples = {row['hash']: {
    'ini': row['_ini_time'] - row['ini_time'],
    'end': row['_end_time'] - row['ini_time']
} for _, row in laughter_examples_df.iterrows()}

In [6]:
speech_examples = {row['hash']: {
    'ini': row['_ini_time'],
    'end': row['_end_time']
} for _, row in speech_examples_df.iterrows()}

In [7]:
# open results
results_path = os.path.join(pilot_path, 'results/second_results')
p = Path(results_path)

In [15]:
all_results = []
all_continuous_annot = []
for dir in p.iterdir():
    hit_id = os.path.basename(dir)
    results_dict = {}
    continuous_annotations = {}

    num_segments = 0
    for json_file in dir.glob('*.json'):
        # get example index
        
        fname = os.path.basename(json_file).split('.')[0]
        if 'example' in fname or 'rating' not in fname:
            continue

        # {index}_{recognition|rating}_{hash}_{condition}_{block}_{1 if calibration else 0}'
        # 14_rating_47d49b6e15d4befe8e7a9508068046a7d5478da30f1cc77074951c94cdef9439_video_1_0_0
        parts = fname.split('_')

        index_in_hit = int(parts[0])
        example_hash = parts[2]
        condition = parts[3]
        block = int(parts[4])
        calibration = (parts[5] == '1')

        # read the json response
        json_res = json.load(open(json_file))['response']

        # get the ground truth
        if example_hash in laughter_examples:
            # print('found in laughter')
            gt_ini = laughter_examples[example_hash]['ini']
            gt_end = laughter_examples[example_hash]['end']
            gt_laughter = True
        elif example_hash in speech_examples:
            # print('found in speech')
            gt_ini = None
            gt_end = None
            gt_laughter = False
        else:
            raise Exception(f'example hash {example_hash} not found anywhere')

        results_dict[example_hash] = {
            'person': None, 
            'hit_id': hit_id, 
            'condition': condition,
            'calibration': calibration,
            'hash': example_hash,
            'gt_ini': gt_ini,
            'gt_end': gt_end,
            'gt_laughter': gt_laughter,
            
            
            'is_laughter': json_res['laughter'],
            'confidence': json_res['confidence'],
            'intensity': json_res['intensity']
        }
        num_segments += 1


    csv_files = [f for f in dir.glob('*.csv')]
    for csv_file in sorted(csv_files):
        fname = os.path.basename(csv_file).split('.')[0]
        if 'example' in fname or 'recognition' not in fname:
            continue
        parts = fname.split('_')
        attempt = int(parts[-1])
        example_hash = parts[2]

        cont_data = pd.read_csv(csv_file, index_col=0, header=0)
        pressed_key = cont_data['data0'].any()

        results_dict[example_hash] = {
            **results_dict[example_hash],
            'attempt': attempt,
            'pressed_key': pressed_key,
            'onset': cont_data[cont_data['data0'] == 1].iloc[0]['media_time'] if pressed_key else None,
            'offset': cont_data[cont_data['data0'] == 1].iloc[-1]['media_time'] if pressed_key else None
        }

        continuous_annotations[example_hash] = cont_data[['media_time', 'data0']].to_numpy()

    all_results.append(results_dict)
    all_continuous_annot.append(continuous_annotations)
    print(f'HIT {hit_id}, segments: {num_segments}')

HIT 9e4d29088fa0d286873e333da975c61f9bb08f695cccd0dab8784613c75eb6fc, segments: 90
HIT 6d7cdf8358d3e34a834163e7484e2bc095251687e7077292fa52cfb277c74778, segments: 90


In [16]:
results = []
annotations = []
for results_dict, continuous_annotations in zip(all_results, all_continuous_annot):
    for key in results_dict.keys():
        results.append(results_dict[key])
        annotations.append(continuous_annotations[key])

In [17]:
results = pd.DataFrame.from_dict(results)
len(results)

180

In [11]:
results.head(10)

Unnamed: 0,person,hit_id,condition,calibration,hash,gt_ini,gt_end,gt_laughter,is_laughter,confidence,intensity,attempt,pressed_key,onset,offset
0,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,av,True,ced6e78fe7940c10fbc9d7c385273e68459ca399ccb668...,2.584895,8.284895,True,True,7,5,0,True,2.926123,8.898762
1,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,video,False,b2fdcbca428d2c6bc6aa1fb270a6a70cc94f873156c881...,,,False,False,5,4,0,False,,
2,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,audio,False,291f0ddfc94eba2ff89a9c26332eba8d937f947196f3ea...,,,False,True,6,1,0,True,7.98322,8.38322
3,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,video,False,6a2f65359542870a20762006e1190c7726fdc5ddb7b07f...,2.956662,3.416662,True,True,4,2,0,True,5.405405,6.072739
4,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,av,False,602d058b4dd0b006931f8b146abcabe45dfda143a6ad99...,2.886694,3.546694,True,True,7,4,0,True,3.326523,3.927124
5,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,audio,False,6272e578b6176b13435403979686b7cc00c746a1b37ecb...,,,False,False,7,4,0,False,,
6,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,av,False,3c34ff9e39fb478f32334428a378a6c95e351f9fd24ec3...,3.169746,3.619746,True,True,7,2,0,True,3.893757,4.994858
7,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,av,False,4d1ab64882187a12626bf173eb1db4a7a6de65a482ad21...,2.509211,3.576211,True,True,3,3,0,True,3.893757,4.627825
8,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,video,False,dbaabe4af355ea52813edd36b7ca92d971961e5671ebc8...,2.202106,2.682106,True,True,4,2,0,True,2.802803,5.271939
9,,6d7cdf8358d3e34a834163e7484e2bc095251687e70772...,av,False,8108b1733b1c13aef1276dd2e7e55be37a573c63788d5d...,,,False,False,7,4,0,False,,


In [18]:
results.to_csv(os.path.join(pilot_path, 'results/second_pilot_results.csv'))

In [19]:
pickle.dump(annotations, open(os.path.join(pilot_path, 'results/second_pilot_continuous.pkl'), 'wb'))