In [1]:
%cd ../
%matplotlib inline

/home/eli/AnacondaProjects/HTFATorch


In [2]:
import collections
import logging
import htfa_torch.niidb as niidb
import htfa_torch.utils as utils
import glob
import os

import numpy as np
from sklearn.model_selection import LeaveOneOut

In [3]:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)

In [4]:
haxby_dir = '/home/eli/Documents/Haxby_Final/'

task_log_csvs = 'events_files'

HAXBY_FILENAME_TEMPLATE = 'sub%d_run%02d_mc_MNI_masked.nii.gz'
TASK_FILE_TEMPLATE = 'sub-%d_task-objectviewing_run-%02d_events.tsv'

HAXBY_FILE = 'data/haxby_mc_zrest_cv.db'

TASK_ONSET_DELAY = 3
def haxby_filename(subject, run):
    return haxby_dir + (HAXBY_FILENAME_TEMPLATE % (subject, run))


class TaskElement:
    def __init__(self, task, start, end,run,t_seconds=True):
        def round_off_time(t):
            if t is not None:
                if task != 'rest':
                    return round((t + TASK_ONSET_DELAY) / 2.5)
                else:
                    return round(t)
            else:
                return None

        self.task = task
        self.run = run
        if t_seconds:
            self.start_time = round_off_time(start)
            self.end_time = round_off_time(end)
        else:
            self.start_time = start
            self.end_time = end

def parse_task_lines(lines,run):
    for (i, line) in enumerate(lines):
        cols = line.strip('\n').split('\t')
        task = cols[2]
        start_time = float(cols[0])
        end_time = start_time + float(cols[1])
        yield TaskElement(task, start_time, end_time,run)

def make_task_blocks(tasks):
    current_task = tasks[0].task
    current_start = tasks[0].start_time
    run = tasks[0].run
    for i in range(1,len(tasks)):
        task_name = tasks[i].task
        if task_name == current_task:
            end_time = tasks[i].end_time
        else:
            yield TaskElement(current_task,current_start,end_time, run, t_seconds=False)
            current_task = task_name
            current_start = tasks[i].start_time
    yield TaskElement(current_task,current_start,end_time, run, t_seconds=False)

def rest_tasks(tasks):
    run = tasks[0].run
    yield TaskElement('rest', 0, tasks[0].start_time - 1, run, t_seconds=False)
    for i in range(1, len(tasks)):
        rest_start = tasks[i - 1].end_time + 1
        rest_end = tasks[i].start_time - 1
        yield TaskElement('rest', rest_start, rest_end, run, t_seconds=False)
    yield TaskElement('rest', tasks[-1].end_time + 1, None, run, t_seconds=False)


bad_runs = collections.defaultdict(lambda: [])
bad_runs[5] = [9]
bad_runs[1] = [11]
def read_tasks(task_csv):
    def sentinel(f):
        return f if f is not None else 0.0

    with open(task_csv, 'r') as task_csv_file:
        header = task_csv_file.readline().strip('\n').split('\t')
        filename = task_csv.split('/')[-1]
        subject = int(filename.split('-')[1][0])
        run = int(filename.split('-')[-1][:2])
        logging.info('Subject %d', subject)
        task_lines = list(parse_task_lines(task_csv_file.readlines(),run=run))
        task_lines = list(make_task_blocks(task_lines))
        task_lines += list(rest_tasks(task_lines))
        rest_lines = [r for r in task_lines if r.task == 'rest']
        rest_lines = sorted(rest_lines, key=lambda t: sentinel(t.run))
        rest_starts_dict = {key: [] for key in range(1, 13)}
        rest_ends_dict = {key: [] for key in range(1, 13)}
        for (i, rest) in enumerate(rest_lines):
            if rest.run in bad_runs[subject]:
                continue
            if rest.end_time is not None and rest.start_time is not None:
                rest_ends_dict[rest.run].append(rest.end_time)
                rest_starts_dict[rest.run].append(rest.start_time)
        task_lines = sorted(task_lines, key=lambda t: sentinel(t.start_time))
        for (i, task) in enumerate(task_lines):
            if task.run in bad_runs[subject]:
                logging.info('Excluding block %d %s of run %d for bad data', i,
                             task.task, task.run)
                continue
            logging.info('Block %d %s of run %d started at %f, ended at %f', i,
                         task.task, task.run, sentinel(task.start_time), sentinel(task.end_time))
            result = niidb.FMriActivationBlock(zscore=True, zscore_by_rest=True)
            result.subject = subject
            result.task = task.task
            result.run = task.run
            result.block = i
            result.start_time = task.start_time
            result.end_time = task.end_time
            result.rest_start_times = rest_starts_dict[result.run]
            result.rest_end_times = rest_ends_dict[result.run]
            yield result

In [5]:
RUNS = set()

for task_csv in utils.sorted_glob(haxby_dir + task_log_csvs + '/*.tsv'):
    for block in read_tasks(task_csv):
        RUNS = RUNS | {block.run}

09/30/2020 17:25:51 Subject 1
09/30/2020 17:25:51 Block 0 rest of run 1 started at 0.000000, ended at 5.000000
09/30/2020 17:25:51 Block 1 scissors of run 1 started at 6.000000, ended at 15.000000
09/30/2020 17:25:51 Block 2 rest of run 1 started at 16.000000, ended at 19.000000
09/30/2020 17:25:51 Block 3 face of run 1 started at 20.000000, ended at 29.000000
09/30/2020 17:25:51 Block 4 rest of run 1 started at 30.000000, ended at 34.000000
09/30/2020 17:25:51 Block 5 cat of run 1 started at 35.000000, ended at 44.000000
09/30/2020 17:25:51 Block 6 rest of run 1 started at 45.000000, ended at 48.000000
09/30/2020 17:25:51 Block 7 shoe of run 1 started at 49.000000, ended at 58.000000
09/30/2020 17:25:51 Block 8 rest of run 1 started at 59.000000, ended at 63.000000
09/30/2020 17:25:51 Block 9 house of run 1 started at 64.000000, ended at 73.000000
09/30/2020 17:25:51 Block 10 rest of run 1 started at 74.000000, ended at 77.000000
09/30/2020 17:25:51 Block 11 scrambledpix of run 1 star

In [6]:
RUNS

cv = LeaveOneOut()
splits = list(cv.split(np.zeros(len(RUNS)), list(RUNS)))

TRAIN_RUNS, VAL_RUNS = splits[0]

In [7]:
OVERRIDE = True

if not os.path.exists(HAXBY_FILE) or OVERRIDE:
    haxby_db = niidb.FMriActivationsDb(HAXBY_FILE, mask='/home/eli/Documents/Haxby_Final/mask/haxby_mask.nii.gz',
                                         smooth=6)

    for task_csv in utils.sorted_glob(haxby_dir + task_log_csvs + '/*.tsv'):
        for block in read_tasks(task_csv):
            if block.run not in TRAIN_RUNS:
                continue
            block.filename = haxby_filename(block.subject, block.run)
#             block.mask = mask_filename(block.subject, block.run)
            haxby_db.upsert(block)

09/30/2020 17:25:52 Subject 1
09/30/2020 17:25:52 Block 0 rest of run 1 started at 0.000000, ended at 5.000000
09/30/2020 17:25:53 Block 1 scissors of run 1 started at 6.000000, ended at 15.000000
09/30/2020 17:25:53 Block 2 rest of run 1 started at 16.000000, ended at 19.000000
09/30/2020 17:25:53 Block 3 face of run 1 started at 20.000000, ended at 29.000000
09/30/2020 17:25:53 Block 4 rest of run 1 started at 30.000000, ended at 34.000000
09/30/2020 17:25:53 Block 5 cat of run 1 started at 35.000000, ended at 44.000000
09/30/2020 17:25:53 Block 6 rest of run 1 started at 45.000000, ended at 48.000000
09/30/2020 17:25:53 Block 7 shoe of run 1 started at 49.000000, ended at 58.000000
09/30/2020 17:25:53 Block 8 rest of run 1 started at 59.000000, ended at 63.000000
09/30/2020 17:25:53 Block 9 house of run 1 started at 64.000000, ended at 73.000000
09/30/2020 17:25:53 Block 10 rest of run 1 started at 74.000000, ended at 77.000000
09/30/2020 17:25:53 Block 11 scrambledpix of run 1 star

In [8]:
logging.info('Finished building NiiDb out of AffVids dataset in %s', haxby_dir)

09/30/2020 17:27:20 Finished building NiiDb out of AffVids dataset in /home/eli/Documents/Haxby_Final/
