In [11]:
# Importing packages 
import os,sys
import numpy as np
import nilearn
import glob
import nibabel as nib
import pandas as pd 
from sklearn.model_selection import cross_val_score
from nilearn.input_data import NiftiMasker 
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
import numpy as np
import warnings
import matplotlib
import multiprocessing
from multiprocessing import Pool
import time
from sklearn import preprocessing



np.seterr(divide='ignore', invalid='ignore')
warnings.filterwarnings("ignore")
matplotlib.use('Agg')

In [None]:


def set_dict():
    ana_dict = {
        "Dataset": None,
        "Fit_Time": None,
        "CV_Time": None,
        "CV_Score": None,
        "SVC": None
    }
    
    return ana_dict;

# Get the data - mask nifti, behavioral file, dataset nifti
def prepare_data(dataset, mask, stim):
    print("Preparing data.......")
    _dict = set_dict()
    #image mask

    _dict["Dataset"] = dataset.split("/")[-1]
    #load behavioral data into a pandas df
    behavioral = pd.read_csv(stim, sep="\t")

    # look at original unique labels 
    print(">ORIGINAL behavioral list: ", behavioral["Label"].unique())

    #grab conditional labels and set up milkshake
    behavioral["Label"] = behavioral.replace(['HF_LS_receipt', 'LF_LS_receipt', 'LF_HS_receipt', 'HF_HS_receipt'], 'milkshake')

    y = behavioral["Label"]
    print(">MODIFIED behavioral list ", y.unique()) # make sure all the milkshake receipts have been replaced with "milkshake"

    #restrict data to our target analysis 
    condition_mask = behavioral["Label"].isin(['milkshake', "h20_receipt"])
    y = y[condition_mask]

    #confirm we have the # of condtions needed
    print(">FINAL behavioral list ", y.unique())
    masker_start = time.time()
    masker = NiftiMasker(mask_img=mask, standardize=True, memory="nilearn_cache", memory_level=5)
    X = masker.fit_transform(dataset)
    # Apply our condition_mask
    X = X[condition_mask]
    masker_time = time.time() - masker_start
    # set time in dict
    mask_size = sys.getsizeof(masker)
    print("MASK SIZE: ", mask_size)
    return [X, y, _dict, masker];

def set_pipeline(data):
    print("Generating pipeline....")
    # PREDICTION FUNCTION
    from sklearn.svm import SVC
    svc = SVC(kernel='linear', max_iter=1000)
    print(svc)
    # FEATURE SELECTION
    feature_selection = SelectKBest(f_classif, k=500)
    anova_svc = Pipeline([('anova', feature_selection), ('svc', svc)])
    fit_start = time.time()
    print("fitting model......")
    X = data[0]
    y = data[1]
    X_size = sys.getsizeof(X)
    print("X SIZE: ", X_size)
    _dict = data[2]
    anova_svc.fit(X,y)
    y_pred = anova_svc.predict(X)
    print(">>Initial Prediction: ", y_pred)
    fit_time = time.time() - fit_start
    _dict["Fit_Time"] = fit_time
    
    return [anova_svc, _dict, svc, feature_selection];

def make_img(dataset, svc, masker, feature_selection):
    print("Making Image........")
    # Here is the image 
    coef = svc.coef_
    # reverse feature selection
    coef = feature_selection.inverse_transform(coef)
    # reverse masking
    weight_img = masker.inverse_transform(coef)
    # Use the mean image as a background to avoid relying on anatomical data
    from nilearn import image
    mean_img = image.mean_img(dataset)
    mean_img.to_filename('/projects/niblab/bids_projects/Experiments/ChocoData/derivatives/code/decoding/milkshake_vs_h2O/images/inclusive/all_mean_nimask.nii')

    # Create the figure
    from nilearn.plotting import plot_stat_map, show
    display = plot_stat_map(weight_img, mean_img, title='Milkshake vs. H2O')
    display.savefig('/projects/niblab/bids_projects/Experiments/ChocoData/derivatives/code/decoding/milkshake_vs_h2O/images/inclusive/all_SVM_nimask.png')
    # Saving the results as a Nifti file may also be important
    weight_img.to_filename('/projects/niblab/bids_projects/Experiments/ChocoData/derivatives/code/decoding/milkshake_vs_h2O/images/inclusive/all_SVM_nimask.nii')

    
def run_nested_cv(data_list, pipe_list, k_range):
    print("K_range: ", k_range)
    # NESTED CROSS VALIDATION
    from sklearn.model_selection import GridSearchCV
    # set params
    X = data_list[0]
    y = data_list[1]
    CV_start = time.time()
    print("running nested CV......")
    pipeline = pipe_list[0]
    _dict = pipe_list[1]
    grid = GridSearchCV(pipeline, param_grid={'anova__k': k_range}, verbose=1, cv=5, n_jobs=3)

    cv_score = cross_val_score(grid, X, y, cv=5, n_jobs=3)
    mean_score = np.mean(cv_score)
    print("Nested CV score: %.4f" % mean_score)
    CV_time= time.time() - CV_start
    _dict["CV_Score"] = mean_score
    _dict["CV_Time"] = CV_time
    
    return _dict;
        


    
def main():
    mask='/projects/niblab/bids_projects/Experiments/ChocoData/images/bin_mask.nii.gz'
    #mask='/Users/nikkibytes/Documents/lab/test_imgs/bin_mask.nii.gz'
    #our behavioral csv file 
    stim = '/projects/niblab/bids_projects/Experiments/ChocoData/behavorial_data/milkshake_all.csv'
    #stim='/Users/nikkibytes/Documents/lab/test_imgs/sub-001.csv'
    #our dataset concatenated image 
    dataset='/projects/niblab/bids_projects/Experiments/ChocoData/images/milkshake_all.nii.gz'
    #dataset='/Users/nikkibytes/Documents/lab/test_imgs/sub-001.nii.gz'
    data_list = prepare_data(dataset, mask, stim)
    masker = data_list[3]
    #print("DATA PREP FINISHED..... data_list: ", data_list) 
    pipe_list = set_pipeline(data_list)
    svc = pipe_list[2]
    feature_selection = pipe_list[3]
    #print("DICTIONARY: ", pipe_list[1])
    #p = Pool(processes = 4) 
    k_range = [ 15 ]#, 30, 50, 100, 150, 300, 500, 1000, 3000, 1500, 5000]
    #from functools import partial
    #func = partial(run_nested_cv, data_list, pipe_list)
    results = run_nested_cv(data_list,pipe_list,k_range)
    start = time.time()
    #cv_score =  p.map(func, k_range)
    results["SVC"] = svc
    make_img(dataset, svc, masker, feature_selection)
    #p.close()
    #p.join()
    #print(cv_score)
    print("Time: ", (time.time() - start)) 
    #results = pipe_list[1] 
    #results["CV_Score"] = cv_score
    print(">>>RESULTS: ", results)

main()

---