In [36]:
import os
from pathlib import Path
import json
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score

In [7]:
# open examples
pilot_path = '/home/jose/drive/data/lared_laughter/pilot2'
laughter_examples_df = pd.read_csv(os.path.join(pilot_path, 'laughter_examples', 'examples.csv'), index_col=0)
laughter_examples_df = laughter_examples_df[laughter_examples_df['rect'].notnull()]
speech_examples_df = pd.read_csv(os.path.join(pilot_path, 'speech_examples', 'examples.csv'), index_col=0)
speech_examples_df = speech_examples_df[speech_examples_df['rect'].notnull()]

In [9]:
# open pilot json
pilot = json.load(open('../pilot.covfee.json'))

In [18]:
# open results
results_path = os.path.join(pilot_path, 'results/csv_download')
p = Path(results_path)


In [23]:
hit_to_hittype = {
    '2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da': 0,
    'ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7': 0,
    'd4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d': 1,
    '3f48cb8025cd905c1a0f7eda7e70e93a3122a8310af6dfc1411d69c36ee5c83f': 2
}
old_hits = [
    '2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da',
    'ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7',
    'd4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d'
]

In [33]:
continuous_annotations = []
results_dict = []
for dir in p.iterdir():
    hit_id = os.path.basename(dir)
    hit_type = hit_to_hittype[hit_id]

    num_segments = 0
    for csv_file in dir.glob('*.json'):
        # get example index
        fname = os.path.basename(csv_file)
        if 'Rating' not in fname or 'Example' in fname or 'Calibration' in fname:
            continue
        index_in_hit = int(fname.split('_')[0])
        if hit_id in old_hits:
            index_in_hit += 7
        example_url = pilot['hits'][hit_type]['tasks'][index_in_hit]['media']['url']
        example_id = example_url.split('/')[-1].split('_')[1]

        json_res = json.load(open(csv_file))['response']


        results_dict.append({
            'person': None, 
            'hit_id': hit_id, 
            'example_id': example_id,
            # 'pressed_key': False,
            # 'onset',
            # 'offset',
            'is_laughter': json_res['laughter'],
            'confidence': json_res['confidence'],
            'intensity': json_res['intensity']
        })
        num_segments += 1
    print(f'HIT {hit_id}, segments: {num_segments}')

HIT 2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da, segments: 120
HIT ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7, segments: 40
HIT d4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d, segments: 120
HIT 3f48cb8025cd905c1a0f7eda7e70e93a3122a8310af6dfc1411d69c36ee5c83f, segments: 120


In [34]:
results = pd.DataFrame.from_dict(results_dict)
len(results)

400

In [37]:
results.head()

Unnamed: 0,person,hit_id,example_id,is_laughter,confidence,intensity
0,,2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5...,6272e578b6176b13435403979686b7cc00c746a1b37ecb...,False,7,4
1,,2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5...,b37bcb0997537153db15add71abd5d6a50864ac9578d7c...,True,7,6
2,,2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5...,c9476c7699c4a5152313994d31ae63059d373d55a12948...,True,7,3
3,,2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5...,0657210cea7baa9d453f7bc16852ff5fdcdd6091a2bb2f...,True,6,3
4,,2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5...,fe70ff90d467f3dccf863b11c9c6b6abdb11efeb727e1f...,False,3,3


In [39]:
results['hit_id'].unique()

array(['2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da',
       'ebad2c51dc19502f1c8a4bad375c4e07087449b34790de075cb9b75d0ff548a7',
       'd4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d',
       '3f48cb8025cd905c1a0f7eda7e70e93a3122a8310af6dfc1411d69c36ee5c83f'],
      dtype=object)

In [40]:
def calc_kappas(var):
    a = results[results['hit_id'] == '2aa4cfd7a395f69d3e67a074bf63d3bb8f1ca0576daad5c22245f66e5afbf0da']
    b = results[results['hit_id'] == 'd4fbeffb2346462b59e7cb8ad392552c8ca2a44e0341892b489090d129b74e1d']

    a_examples = a['example_id'].to_list()
    b_examples = b['example_id'].to_list()
    joint = {id: [] for id in a_examples if id in b_examples}

In [42]:
calc_kappas()

<class 'pandas.core.series.Series'>
