In [72]:
import numpy as np
import pandas as pd
from glob import glob

In [3]:
with open('../scripts/preprocessing-stimulus/splits.txt') as f:
    splits = f.readlines()

In [5]:
def convert_to_seconds(hhmmss):
    h, m, s = hhmmss.split(':')
    s = float(h) * 3600 + float(m) * 60 + float(s)
    return s

In [6]:
splits = [s.strip().split() for s in splits]

In [7]:
splits

[['00:45:22.899', '00:55:00.299'],
 ['00:54:50', '01:02:47.133'],
 ['01:02:27', '01:11:01.599'],
 ['01:10:50', '01:20:47.699'],
 ['01:20:37', '01:33:39.533']]

In [8]:
convert_to_seconds(splits[0][0])

2722.899

In [9]:
# I need to compute the following
# 1. Duration of the clip
# 2. Overlap of the old clip with new clip

In [17]:
splits_s = np.array([[convert_to_seconds(ss) for ss in split] for split in splits])

In [53]:
durations = np.round(np.diff(splits_s, 1))

In [54]:
durations

array([[577.],
       [477.],
       [515.],
       [598.],
       [783.]])

In [23]:
splits_s

array([[2722.899, 3300.299],
       [3290.   , 3767.133],
       [3747.   , 4261.599],
       [4250.   , 4847.699],
       [4837.   , 5619.533]])

In [28]:
overlap = np.round(splits_s[:-1, 1] - splits_s[1:, 0])

In [32]:
overlap = [0] + overlap.tolist()
overlap

[0, 10.0, 20.0, 12.0, 11.0]

In [85]:
clip_start = 10.  # 10 s buffer at the beginning
dfs = []
for duration, over in zip(durations.ravel(), overlap):
    records = []
    onset = 0
    if over != 0:
        records.append({
            'onset': onset + clip_start,
            'duration': over,
            'trial_type': 'overlap_with_previous_run'
        })
        onset = onset + over
        duration = duration - over
    records.append({
        'onset': onset + clip_start,
        'duration': duration,
        'trial_type': 'movie'
    })
    dfs.append(pd.DataFrame.from_records(records))

In [86]:
durations

array([[577.],
       [477.],
       [515.],
       [598.],
       [783.]])

In [87]:
for df in dfs:
    print(df)

   onset  duration trial_type
0   10.0     577.0      movie
   onset  duration                 trial_type
0   10.0      10.0  overlap_with_previous_run
1   20.0     467.0                      movie
   onset  duration                 trial_type
0   10.0      20.0  overlap_with_previous_run
1   30.0     495.0                      movie
   onset  duration                 trial_type
0   10.0      12.0  overlap_with_previous_run
1   22.0     586.0                      movie
   onset  duration                 trial_type
0   10.0      11.0  overlap_with_previous_run
1   21.0     772.0                      movie


In [88]:
n_trs_run = [
    598,
    498,
    535,
    618,
    803
]

In [89]:
for df in dfs:
    print(df.onset[0] + np.sum(df.duration) + 10)

597.0
497.0
535.0
618.0
803.0


In [97]:
subjects = sorted([s.split('/')[-1] for s in glob('../data/sub-*')])

In [98]:
for s in subjects:
    print(s)
    events = sorted(glob(f"../data/{s}/func/*events.tsv"))
    for ev, df in zip(events, dfs):
        df.to_csv(ev, sep='\t', index=None)

sub-sid000005
sub-sid000007
sub-sid000009
sub-sid000010
sub-sid000013
sub-sid000020
sub-sid000021
sub-sid000024
sub-sid000025
sub-sid000029
sub-sid000030
sub-sid000034
sub-sid000050
sub-sid000052
sub-sid000055
sub-sid000114
sub-sid000120
sub-sid000134
sub-sid000142
sub-sid000278
sub-sid000416
sub-sid000499
sub-sid000522
sub-sid000535
sub-sid000560
