In [1]:
%cd ../
%matplotlib inline

/home/eli/AnacondaProjects/HTFATorch


In [2]:
import collections
import logging
import htfa_torch.niidb as niidb
import htfa_torch.utils as utils
import glob
import os
import webdataset as wds
import torch

In [3]:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)

In [4]:
affvids_dir = '/home/eli/Documents/AffVids/motion_corrected/'

task_log_csvs = 'editedlogfiles'

TASK_ONSET_DELAY = 3

AFFVID_FILENAME_TEMPLATE = 'sub%02d_run%02d_mc_MNI_masked.nii.gz'

AFFVIDS_FILE = 'data/affvids2018_mc_zrest_norest.tar'

tar_file = AFFVIDS_FILE
sink = wds.TarWriter(tar_file)


def affvid_filename(subject, run):
    return affvids_dir + (AFFVID_FILENAME_TEMPLATE % (subject, run))

class TaskElement:
    def __init__(self, task, start, end, run, fear_rating=None):
        def round_off_time(t):
            if t is not None:
                if task != 'rest':
                    return round(t + TASK_ONSET_DELAY)
                else:
                    return round(t)
            else:
                return None
        self.task = task
        self.start_time = round_off_time(start)
        self.end_time = round_off_time(end)
        self.run = run
        self.fear_rating = fear_rating

def parse_task_lines(lines):
    for (i, line) in enumerate(lines):
        cols = line.split(' ')
        task = cols[0]
        start_time = float(cols[9])
        end_time = float(cols[10])
        run = int(cols[1])
        fear_rating = float(cols[11])
        yield TaskElement(task, start_time, end_time, run, fear_rating)

def rest_tasks(tasks):
    yield TaskElement('rest', 0, tasks[0].start_time - 1, tasks[0].run)
    for i in range(1, len(tasks)):
        rest_start = tasks[i-1].end_time + 1
        rest_end = tasks[i].start_time - 1
        if tasks[i].run == tasks[i-1].run:
            yield TaskElement('rest', rest_start, rest_end, tasks[i].run)
        else:
            yield TaskElement('rest', rest_start, None, tasks[i-1].run)
            yield TaskElement('rest', 0, rest_end, tasks[i].run)
    yield TaskElement('rest', tasks[-1].end_time + 1, None, tasks[-1].run)

bad_runs = collections.defaultdict(lambda: [])
bad_runs[3] = [1, 2, 3]
bad_runs[7] = [3]
bad_runs[14] = [3]
bad_runs[22] = [1, 2, 3]
bad_runs[23] = [1]
bad_runs[24] = [1, 2, 3]
bad_runs[26] = [3]

## exclude these to create a mini

exclude_subjects = []
exclude_tasks = ['rest']

def read_tasks(task_csv):
    def sentinel(f):
        return f if f is not None else 0.0
    with open(task_csv, 'r') as task_csv_file:
        header = task_csv_file.readline().split(' ')
        subject = int(header[4])
        logging.info('Subject %d', subject)
        task_lines = list(parse_task_lines(task_csv_file.readlines()))
        task_lines += list(rest_tasks(task_lines))
        rest_lines = [r for r in task_lines if r.task == 'rest']
        rest_lines = sorted(rest_lines, key=lambda t: sentinel(t.run))
        rest_starts_dict = {key: [] for key in range(1, 4)}
        rest_ends_dict = {key: [] for key in range(1, 4)}
        for (i,rest) in enumerate(rest_lines):
            if (rest.run in bad_runs[subject]) or (int(subject) in exclude_subjects):
                continue
            if rest.end_time is not None and rest.start_time is not None:
                rest_ends_dict[rest.run].append(rest.end_time)
                rest_starts_dict[rest.run].append(rest.start_time)
        task_lines = sorted(task_lines, key=lambda t: sentinel(t.start_time))
        for (i, task) in enumerate(task_lines):
            if task.run in bad_runs[subject] or (int(subject) in exclude_subjects) or (task.task in exclude_tasks):
                logging.info('Excluding block %d %s of run %d for motion', i,
                             task.task, task.run)
                continue
            logging.info('Block %d %s of run %d started at %f, ended at %f', i,
                         task.task, task.run, sentinel(task.start_time), sentinel(task.end_time))
            result = niidb.FMriActivationBlock(zscore=True, zscore_by_rest=True)
            result.subject = subject
            result.task = task.task
            result.run = task.run
            result.block = i
            result.start_time = task.start_time
            result.end_time = task.end_time
            result.rest_start_times = rest_starts_dict[result.run]
            result.rest_end_times = rest_ends_dict[result.run]
            result.individual_differences = {'fear_rating': task.fear_rating}
            yield result

In [5]:
OVERRIDE = True

if not os.path.exists(AFFVIDS_FILE) or OVERRIDE:
    total_trs = 0
    metadata = {
        'blocks': []
    }
    block_id = 0
    for task_csv in utils.sorted_glob(affvids_dir + task_log_csvs + '/*.txt'):
        for block in read_tasks(task_csv):
            block.filename = affvid_filename(block.subject, block.run)
#             block.mask = mask_filename(block.subject, block.run)
            block.rest_end_times = '[' + ', '.join(map(str, block.rest_end_times)) + ']'
            block.rest_start_times = '[' + ', '.join(map(str, block.rest_start_times)) + ']'
            block.block = block_id
            block_id += 1
            block.mask = '/home/eli/Documents/AffVids/wholebrain2.nii.gz'
            block.smooth = 6
            block.load()
            metadata['blocks'].append(block.wds_metadata())

            for vals in block.format_wds():
                sink.write(vals)
            block_trs = (block.end_time - block.start_time)
            total_trs += block_trs
    
    metadata['voxel_locations'] = block.locations
    metadata['num_times'] = total_trs
    torch.save(metadata, tar_file + '.meta')
    logging.info('Recorded metadata, including voxel locations')

sink.close()

09/17/2021 15:34:58 Subject 10
09/17/2021 15:34:58 Excluding block 0 rest of run 1 for motion
09/17/2021 15:34:58 Excluding block 1 rest of run 2 for motion
09/17/2021 15:34:58 Excluding block 2 rest of run 3 for motion
09/17/2021 15:34:58 Block 3 spider_low_3.m4v of run 2 started at 23.000000, ended at 44.000000
09/17/2021 15:34:58 Loading Nifti image /home/eli/Documents/AffVids/motion_corrected/sub10_run02_mc_MNI_masked.nii.gz with mask /home/eli/Documents/AffVids/wholebrain2.nii.gz (zscore=True, smooth=6, zscore_by_rest=True)
09/17/2021 15:35:09 Block 4 heights_low_6.m4v of run 3 started at 23.000000, ended at 41.000000
09/17/2021 15:35:09 Loading Nifti image /home/eli/Documents/AffVids/motion_corrected/sub10_run03_mc_MNI_masked.nii.gz with mask /home/eli/Documents/AffVids/wholebrain2.nii.gz (zscore=True, smooth=6, zscore_by_rest=True)
09/17/2021 15:35:20 Block 5 spider_high_5.m4v of run 1 started at 25.000000, ended at 46.000000
09/17/2021 15:35:20 Loading Nifti image /home/eli/Doc

In [6]:
import htfa_torch.tardb as tardb

In [7]:
db = tardb.FmriTarDataset(AFFVIDS_FILE)

In [8]:
db.mean_block(save=True)
db.normalize_activations(save=True)

([tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.9563),
  tensor(7.1691),
  tensor(7.1454),
  tensor(7.9563),
  tensor(7.1691),
  tensor(7.1454),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1454),
  tensor(7.1691),
  tensor(7.9563),
  tensor(7.1691),
  tensor(7.1454),
  tensor(9.5725),
  tensor(10.3772),
  tensor(13.1758),
  tensor(9.5725),
  tensor(10.3772),
  tensor(13.1758),
  tensor(9.5725),
  tensor(10.3772),
  tensor(13.1758),
  tensor(9.5725),
  tensor(13.1758),
  tensor(10.3772),
  tensor(9.5725),
  tensor(10.3772),
  tensor(13.1758),
  tensor(9.5725),
  tensor(10.3772),
  tensor(13.1758),
  tensor(9.5725)

In [10]:
logging.info('Finished building TarDb out of AffVids dataset in %s', affvids_dir)

09/17/2021 15:56:28 Finished building TarDb out of AffVids dataset in /home/eli/Documents/AffVids/motion_corrected/
