## This starter notebook uses the mosaic-dataset python package to:
 1. download MOSAIC hdf5 files containing the fMRI beta responses from the AWS bucket (https://mosaicfmri.s3.amazonaws.com/index.html)
 2. visualize single trial beta values on an inflated brain
 3. download brain optimized model weights and load the model
 4. run inference on a model
 5. visualize model predictions on an inflated brain
 6. download stimulus and participant information per dataset

In [None]:
!pip install mosaic-dataset --upgrade

### 1. Donwload MOSAIC hdf5 files containing the fMRI beta responses

In [None]:
import mosaic

#this method locally downloads the specified hdf5 file(s) if not yet already downloaded. Additionally, the returned dataset variable formats the responses by ROI (MMP1.0 parcellation) and concatenates multiple subjects together, if applicable
dataset = mosaic.load(
    names_and_subjects={
        "GOD": [1],
    },
    folder="./mosaic-dataset" 
)

print(dataset[0].keys())

### 2. Visualize single trial beta values on an inflated brain

In [None]:
from mosaic.utils import visualize

visualize(
    betas=dataset[0]["betas"],
    ## set rois to None if you want to visualize all of the rois
    rois=[
        "L_FFC",
        "R_FFC",
        "L_V1",
        "R_V1"
    ],
    ## other modes are: 'white', 'midthickness', 'pial', 'inflated', 'very_inflated', 'flat', 'sphere'
    mode = "inflated",
    save_as = "plot.html",
)

### 3. Download brain optimized model weights and load the model

In [None]:
import mosaic

model = mosaic.from_pretrained(
    backbone_name="CNN8",
    framework="singlehead",
    subjects="sub-01_NSD",
    vertices="visual",
)

### 4. Run inference on the brain-optimized model

In [None]:
!wget -O face.jpg https://images.unsplash.com/photo-1542909168-82c3e7fdca5c

In [None]:
#visualize the image
from PIL import Image
im = Image.open("face.jpg").convert("RGB")
im

In [None]:
from mosaic.utils.inference import MosaicInference

inference = MosaicInference(
    model=model,
    batch_size=32,
    device="cpu"
)

results = inference.run(
    images = [
        Image.open("face.jpg").convert("RGB"),
    ],
    names_and_subjects={"NSD": "all", "GOD": [1,2]}
)

#inference returns vertex predictions for each of the subjects
for dataset in results.keys():
    for subjectID, prediction in results[dataset].items():
        print(f"{dataset} {subjectID} prediction shape: {prediction.shape}")

### 5. visualize model predictions on an inflated brain

In [None]:
#note responses to the face are highest in the ventral stream
inference.plot(
    image=Image.open("face.jpg").convert("RGB"),
    save_as="predicted_voxel_responses.html",
    dataset_name="NSD",
    subject_id=1,
    mode="inflated"
)

### 6. download stimulus and participant information per dataset

In [None]:
#load the corresponding stimulus info tsv file for a dataset
from mosaic.stiminfo import get_stiminfo

"""
The stimulus information .tsv files have columns:
-filename: str, filename of the stimulus referenced in the hdf5 files
-alias: str or NaN, alternate stimulus filename. Some datasets change or truncate a file's original name to something else more convenient for the fMRI dataset. Here we try to recover the mapping for improved data provenance.
-source: str, if known, what (usually computer vision) dataset was this stimulus first released in? Example is ImageNet, COCO, SUN, MomentsInTime, etc.
-test_train: str, 'test' or 'train' depending on whether or not this stimulus is part of the train or test set. MOSAIC preserves the original publications test/train split, if defined, so if the stimulus was originally test or train, it will be the same in MOSAIC.
-sub-XX_reps: int, how many times subject XX saw that stimulus throughout the experiment
"""

# use one of ['BOLD5000', 'deeprecon', 'GOD', 'NSD', 'THINGS', 'BMD', 'NOD', 'HAD']
stiminfo = get_stiminfo(dataset_name="HAD") #can optionally specificy where to locally save the stim info tsv file. default is ./mosaic_stiminfo
stiminfo.head(10) #view 10 rows

In [None]:
#load the corresponding participant info tsv file for a dataset
from mosaic.participantinfo import get_participantinfo

"""
The participant information .tsv files have columns following the BIDS convention (information here for each dataset is copied from the dataset's original release):
-participant_id: str, subject id like sub-XX
-age: str, age of participant at time of experiment
-sex: str, M or F sex of participant.
-handedness: optional str, left or right dominant hand of the participant.
-group: optional, for NOD it is multi-session (1-9) or single-session (10-30) corresponding to whether the participant participated in more than one or one session of the experiment.

The 'participants_shared.tsv' file displays the mapping of shared subjects between datasets.
"""

# use one of ["BOLD5000", "BOLDMomentsDataset", "deeprecon", "GenericObjectDecoding", "HumanActionsDataset", "NaturalObjectDataset", "NaturalScenesDataset", "THINGS_fmri"] or 'shared' for overlapping subjects
participantinfo = get_participantinfo(dataset_name="THINGS_fmri") #can optionally specificy where to locally save the stim info tsv file. default is ./mosaic_participantinfo
participantinfo.head(10) #view 10 rows