In [11]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import load_model
try:
    from utils import find_repo_root, calculate_vectorized_correlation, get_fmri, get_pca
except:
    from utils_without_motion import find_repo_root, calculate_vectorized_correlation, get_fmri, get_pca

### Function Definitions

In [12]:
def save_dataframes(df1, df2, layer, data_mode="test"):
    """
    saves test results for testing / validation in csv files
    :param df1: previously created df with one line per voxel, subject, ROI & layer
    :param df2: previously created df aggregated over voxels and subject (yields score per ROI & layer)
    :param layer: used stage of the feature extraction model
    :param data_mode: set to "val" if validation scores have been calculated
    """
    if data_mode == "val":
        mode_str = "_val"
    else:
        mode_str = ""
    test_scores_dir = os.path.join(os.getcwd(), "test_scores")
    if not os.path.exists(test_scores_dir):
      os.makedirs(test_scores_dir)
    df1.to_csv(os.path.join(test_scores_dir, f"test_results_{layer}{mode_str}.csv"), index=False)
    df2.to_csv(os.path.join(test_scores_dir, f"test_results_aggregated_{layer}{mode_str}.csv"), index=False)

In [16]:
def test_model(layer, ROI, sub, X_test, y_test, df, mode="test"):
    """
    reads in model for a certain stage, ROI & subject
    tests the models, saves the predicted brain activations, and appends the test results to the results df
    :param layer: used stage of the feature extraction model
    :param ROI: region of interest
    :param sub: current subject
    :param X_test: test data. Use validation for mode "val" and test for mode "test"
    :param y_test: test labels. Use validation for mode "val" and test for mode "test"
    :param df: df containing previous testing results
    :param mode:  set to "val" if validation scores have been calculated
    """
    """
    
    :return: overview over correlation score values
    """
    # navigate to correct stored model
    model_dir = os.path.join(os.getcwd(), "models", layer, ROI, sub)
    model = load_model(model_dir + "/model.keras")
    
    # calculate predicted voxel activations
    prediction = model.predict(X_test)
    
    # calculate evaluation metric
    test_corr = calculate_vectorized_correlation(y_test, prediction)
    
    # add evaluation metric results to the results dataframe
    new_values = {'stage': layer, 'ROI': ROI, 'sub': sub, 'correlation_score': test_corr}
    new_values = pd.DataFrame(new_values)
    new_values['voxel'] = new_values.index + 1
    df = pd.concat([df, new_values], ignore_index=True)
    
    # save the predicted fmri's
    if mode == "test":
        predictions_dir = os.path.join(os.getcwd(), "predictions", layer, ROI, sub)
        if not os.path.exists(predictions_dir):
          os.makedirs(predictions_dir)
        np.save('prediction.npy', prediction)

    return df
    

### Model Evaluation

In [18]:
# load one only one main PCA file into the Ucloud session. This will determine the layer
layer_list = ["stage_1", "stage_2", "stage_3", "stage_4", "stage_5", "final"]
for i in layer_list:
    if os.path.exists(f"{i}_pca.npy"):
        layer = i
        break

# set to "test" for regular testing, and to "val" to get validation scores based on validation set
data_mode = "val"

subs = ["sub01","sub02","sub03","sub04","sub05","sub06","sub07","sub08","sub09","sub10"]
ROIs = ["WB", "V1", "V2","V3", "V4", "LOC", "EBA", "FFA","STS", "PPA"]

# test results dataframe
column_names = ['voxel', 'stage', 'ROI', 'sub', 'correlation_score']
test_results = pd.DataFrame(columns=column_names)

# get test data
if data_mode == "test":
    X_test = get_pca(layer, mode=data_mode)
elif data_mode == "val":
    X_train, X_val = get_pca(layer, mode=data_mode)

for sub in subs:
    for ROI in ROIs:
        # read in test data
        if ROI == "WB":
            track="full_track"
        else:
            track="mini_track"
        try:
            if data_mode == "test":
                y_test = get_fmri(ROI, track, sub, mode=data_mode)
                test_results = test_model(layer, ROI, sub, X_test, y_test, test_results, data_mode)
            elif data_mode == "val":
                y_train, y_val = get_fmri(ROI, track, sub, mode=data_mode)
                test_results = test_model(layer, ROI, sub, X_val, y_val, test_results, data_mode)
        except OSError:
            print(f"Execution for {sub} ended at {ROI}")
            break
        print(f"finished testing sub: {sub}, ROI: {ROI}")

# calculate aggregated scores

# aggregate per subject
test_results_aggregated = test_results.groupby(["ROI", "stage","sub"])["correlation_score"].agg(np.mean).reset_index()
# aggregate over subjects
test_results_aggregated = test_results_aggregated.groupby(["ROI", "stage"])["correlation_score"].agg(np.mean).reset_index()

# save the dataframes
save_dataframes(test_results, test_results_aggregated, layer, data_mode)

train_pca shape:  (800, 900)
val_pca shape:  (100, 900)
test_pca shape:  (100, 900)
ROI_train shape:  (800, 18222)
ROI_val shape:  (100, 18222)
ROI_test shape:  (100, 18222)


  df = pd.concat([df, new_values], ignore_index=True)
  test_results_aggregated = test_results.groupby(["ROI", "stage","sub"])["correlation_score"].agg(np.mean).reset_index()
  test_results_aggregated = test_results_aggregated.groupby(["ROI", "stage"])["correlation_score"].agg(np.mean).reset_index()


PermissionError: [Errno 13] Permission denied: 'C:\\Users\\julia\\OneDrive - CBS - Copenhagen Business School\\Documents\\Master\\Semester3\\AdvancedML\\Brainvision_Project\\test_scores\\test_results_aggregated.csv'