In [1]:
import os
import pickle
from pathlib import Path
import json

import pandas as pd
import numpy as np

In [2]:
# open examples
pilot_path = '/home/jose/drive/data/lared_laughter/pilot2'
laughter_examples_df = pd.read_csv(os.path.join(pilot_path, 'laughter_examples', 'examples.csv'), index_col=0)
speech_examples_df = pd.read_csv(os.path.join(pilot_path, 'speech_examples', 'examples.csv'), index_col=0)

In [3]:
laughter_examples_df.head()

Unnamed: 0,id,pid,cam,valid,hash,_ini_time,_end_time,ini_time,end_time,labels,rect,ini,len
0,,1,2,True,3697019e90ec3c36979d8f5d97bd87e12b09cb83640554...,1028.28,1030.08,1026.363079,1032.543362,"{'vad': None, 'label': None, 'vad_seg': None}",,,
1,,1,3,True,bbbb2623b09bcb1896967141eedf5c56aec51379bfdc1e...,1028.28,1030.08,1026.363079,1032.543362,"{'vad': None, 'label': None, 'vad_seg': None}",,,
2,,1,4,True,24db998367045b41ad6990ee79cb650dd2329b3c899939...,1028.28,1030.08,1026.363079,1032.543362,"{'vad': None, 'label': None, 'vad_seg': None}",,,
3,,1,2,True,533467606958d0cf5ebdd394172b5f975b14d3fa3121b6...,1049.98,1051.3,1047.638924,1054.518364,"{'vad': None, 'label': None, 'vad_seg': None}",,,
4,,1,3,True,a5c3063d3e9a55b9426fd67c84ac0bc82cb605d8c70e82...,1049.98,1051.3,1047.638924,1054.518364,"{'vad': None, 'label': None, 'vad_seg': None}",,,


In [4]:
laughter_examples = {row['hash']: {
    'ini': row['_ini_time'] - row['ini_time'],
    'end': row['_end_time'] - row['ini_time']
} for _, row in laughter_examples_df.iterrows()}

In [5]:
speech_examples = {row['hash']: {
    'ini': row['_ini_time'],
    'end': row['_end_time']
} for _, row in speech_examples_df.iterrows()}

In [6]:
# open pilot json
pilot = json.load(open('../first_pilot.covfee.json'))

In [7]:
# open results
results_path = os.path.join(pilot_path, 'results/csv_download')
p = Path(results_path)


In [8]:
hit_to_hittype = {
    '2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da': 0,
    'ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7': 0,
    'd4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d': 1,
    '3f48cb8025cd905c1a0f7eda7e70e93a3122a8310af6dfc1411d69c36ee5c83f': 2,
    '2e1248ccfbcd0b52875f8632ef3c5c7a7016818f1f8d57bfa6d7a8f7a1975a2a': 2,
    '3b378a0ce8c12b08324020d11809e6329fe0ae32c15e4bae86e26d9682aa70a9': 2
}
old_hits = [
    '2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da',
    'ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7',
    'd4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d'
]

In [9]:
filemap = {}
for dir in p.iterdir():
    hit_id = os.path.basename(dir)

    filemap = {}
    for f in os.listdir(dir):
        if f[0] == '.': continue
        idx = int(f.split('_')[0])
        if idx not in filemap:
            filemap[idx] = []
        filemap[idx].append(f)

In [10]:
filemap

{81: ['81_#35: Recognition_0.json', '81_#35: Recognition_0.csv'],
 116: ['116_#11: Recognition_0.json', '116_#11: Recognition_0.csv'],
 60: ['60_#24: Rating_0.json'],
 231: ['231_#25: Recognition_0.csv', '231_#25: Recognition_0.json'],
 12: ['12_#1: Rating_0.json'],
 238: ['238_#28: Rating_0.json'],
 102: ['102_#4: Recognition_0.csv', '102_#4: Recognition_0.json'],
 213: ['213_#17: Recognition_0.json',
  '213_#17: Recognition_1.csv',
  '213_#17: Recognition_0.csv',
  '213_#17: Recognition_1.json'],
 214: ['214_#17: Rating_0.json'],
 59: ['59_#24: Recognition_0.json', '59_#24: Recognition_0.csv'],
 94: ['94_Calibration: Recognition_0.csv',
  '94_Calibration: Recognition_1.json',
  '94_Calibration: Recognition_1.csv',
  '94_Calibration: Recognition_0.json'],
 139: ['139_#21: Rating_0.json'],
 143: ['143_#23: Rating_0.json'],
 171: ['171_#37: Rating_0.json'],
 146: ['146_#25: Recognition_0.json', '146_#25: Recognition_0.csv'],
 13: ['13_#2: Recognition_0.csv', '13_#2: Recognition_0.json']

In [16]:
continuous_annotations = []
results_dict = []
for dir in p.iterdir():
    hit_id = os.path.basename(dir)
    hit_type = hit_to_hittype[hit_id]

    num_segments = 0
    for json_file in dir.glob('*.json'):
        # get example index
        fname = os.path.basename(json_file).split('.')[0]
        if 'Rating' not in fname or 'Example' in fname:
            continue
        index_in_hit = int(fname.split('_')[0])
        if hit_id in old_hits:
            index_in_hit += 7
        example_url = pilot['hits'][hit_type]['tasks'][index_in_hit]['media']['url']
        example_hash = example_url.split('/')[-1].split('_')[1]
        condition = example_url.split('/')[-2]
        if condition == 'aiv':
            condition = 'audio'
        
        is_calibration = 'Calibration' in fname

        # read the json response
        json_res = json.load(open(json_file))['response']
        if not json_res:
            json_res = {}

        # read the continuous data
        index = int(fname.split('_')[0])-1
        number_chunk = fname.split('_')[1].split(':')[0]
        second_response_file = os.path.join(dir, f'{index}_{number_chunk}: Recognition_1.csv')
        first_response_file = os.path.join(dir, f'{index}_{number_chunk}: Recognition_0.csv')

        attempt = None
        if os.path.exists(second_response_file):
            attempt = 1
            response_file = second_response_file
        elif os.path.exists(first_response_file):
            attempt = 0
            response_file = first_response_file
        else:
            print(f'no continous response {first_response_file} or {second_response_file}')

        cont_data = pd.read_csv(response_file, index_col=0, header=0)
        pressed_key = cont_data['data0'].any()
        
        # get the ground truth
        if example_hash in laughter_examples:
            # print('found in laughter')
            gt_ini = laughter_examples[example_hash]['ini']
            gt_end = laughter_examples[example_hash]['end']
            gt_laughter = True
        elif example_hash in speech_examples:
            # print('found in speech')
            gt_ini = None
            gt_end = None
            gt_laughter = False
        else:
            raise Exception(f'example hash {example_hash} not found anywhere')

        continuous_annotations.append(cont_data[['media_time', 'data0']].to_numpy())

        results_dict.append({
            'person': None, 
            'hit_id': hit_id, 
            'calibration': is_calibration,
            'condition': condition,
            'hash': example_hash,
            'attempt': attempt,
            'gt_ini': gt_ini,
            'gt_end': gt_end,
            'gt_laughter': gt_laughter,
            'pressed_key': pressed_key,
            'onset': cont_data[cont_data['data0'] == 1].iloc[0]['media_time'] if pressed_key else None,
            'offset': cont_data[cont_data['data0'] == 1].iloc[-1]['media_time'] if pressed_key else None,
            'is_laughter': json_res.get('laughter', None),
            'confidence': json_res.get('confidence', None),
            'intensity': json_res.get('intensity', None)
        })
        num_segments += 1
    print(f'HIT {hit_id}, segments: {num_segments}')

HIT 2e1248ccfbcd0b52875f8632ef3c5c7a7016818f1f8d57bfa6d7a8f7a1975a2a, segments: 126
HIT 3b378a0ce8c12b08324020d11809e6329fe0ae32c15e4bae86e26d9682aa70a9, segments: 17
HIT 2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da, segments: 126
HIT ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7, segments: 43
HIT d4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d, segments: 126
HIT 3f48cb8025cd905c1a0f7eda7e70e93a3122a8310af6dfc1411d69c36ee5c83f, segments: 126


In [17]:
results = pd.DataFrame.from_dict(results_dict)
len(results)

564

In [18]:
results.drop(['hit_id'], axis=1).head(20)

Unnamed: 0,person,calibration,condition,hash,attempt,gt_ini,gt_end,gt_laughter,pressed_key,onset,offset,is_laughter,confidence,intensity
0,,False,video,c04425157021aec61593af1ad0f98d3cb6bfdd1e722060...,0,1.919661,2.785661,True,True,3.43677,4.471138,True,3.0,1.0
1,,False,video,4c33f28fdfd04e773bcaffcd7cb3ae4d98f2455744bb21...,0,3.204874,6.564874,True,True,3.53687,6.006006,True,5.0,5.0
2,,False,av,4e94802c123eb9f6060854f9ed0121cb9c1e4ba44708b9...,0,1.653297,2.393297,True,True,2.292156,2.659189,True,6.0,2.0
3,,False,av,11866b5358caabe46c4528bfc49de7579363607700b1cc...,0,3.272997,5.272997,True,True,4.928125,5.762292,True,7.0,4.0
4,,False,audio,8e4201ed4e84ac8d937fb6bb92aec9b8feb53b51fb3622...,0,,,False,False,,,False,6.0,4.0
5,,False,audio,8f31104e99128232be56331eb2fad4486fc9a4d49f0048...,0,,,False,False,,,False,4.0,4.0
6,,False,audio,fcce19ded1b21bd5a7d511d6e0d8876b5bffe42d1ed24c...,0,,,False,False,,,False,5.0,4.0
7,,False,audio,dbaabe4af355ea52813edd36b7ca92d971961e5671ebc8...,0,2.202106,2.682106,True,False,,,False,5.0,4.0
8,,False,video,8108b1733b1c13aef1276dd2e7e55be37a573c63788d5d...,0,,,False,False,,,True,3.0,2.0
9,,False,av,6a86605aa53c2936f608d55ae654614dde87bacfbadcb8...,0,2.9286,5.9626,True,True,3.526724,6.39626,True,6.0,4.0


In [19]:
results.to_csv(os.path.join(pilot_path, 'results/first_pilot_results.csv'))

In [20]:

pickle.dump(continuous_annotations, open(os.path.join(pilot_path, 'results/first_pilot_continuous.pkl'), 'wb'))