## Decoding object category from the neural data

This code decodes the object category from neural data, over time.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LogisticRegression 
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from scipy.stats import zscore, norm
from sklearn import preprocessing
from sklearn.model_selection import KFold
print("Packages loaded.")

Packages loaded.


In [None]:
def decode(features,labels,nrfolds=2,seed=0):
 
    classes=np.unique(labels) # Gets the unique labels 
    
    nrImages = features.shape[1] # Gets the number of images
    
    _,ind = np.unique(classes, return_inverse=True) # Assign index numbers to each label
    
    features = zscore(features,axis=0) # Scale data - convert them into z-score
    
    # features = preprocessing.scale(features) # Scale data another alternative - standardize it
    
    num_classes = len(classes) # Get how many unique labels (so classes) we have
    # prob = np.zeros((nrImages,len(classes)))
    # prob[:]=np.nan
    prob = np.full((nrImages, len(classes)), np.nan)
        
    accuracies_test = []
    accuracies_train = []
    
    # Using K-Fold Cross-Validation
    skf = KFold(n_splits=nrfolds, shuffle=True, random_state = seed)
    
    for train_index, test_index in skf.split(features.T, labels):
       
        X_train_fold, X_test_fold = features[:,train_index], features[:,test_index]
        y_train_fold, y_test_fold = labels[train_index], labels[test_index]

        # Define & train the classifier
        # clf = OneVsRestClassifier(SVC( C=5*10e4, kernel='linear', probability=True)).fit(XTrain.T, YTrain)
        clf = OneVsRestClassifier(LogisticRegression(penalty='l2', C=5*10e4, max_iter=1000, class_weight='balanced')).fit(X_train_fold.T, y_train_fold)
        
        # Get class probabilities with & test the classifier
        pred=clf.predict_proba(X_test_fold.T)
        
        # Save the probabilities predicted by the classifier, of this fold
        prob[test_index,0:num_classes]=pred
        
        accuracies_test.append(clf.score(X_test_fold.T, y_test_fold))
        
        accuracies_train.append(clf.score(X_train_fold.T, y_train_fold))

    return prob, accuracies_test, accuracies_train

def get_probability_correct(prob, labels, class_order):
    
    # This function calculates the ratio of the correctly predicted labels 
    
    nrImages = prob.shape[0]
    
    class_order=np.unique(labels) 
    
    pc = np.full((nrImages, len(class_order)), np.nan)
    
    _,ind = np.unique(labels, return_inverse=True)
    
    for i in range(nrImages):
        
        # Create a Boolean array, setting the label of the current image as True and the rest False
        loc_target = labels[i]==class_order 
        
        # Probability of predicting the correct label / probabilities for all classes + Probability of predicting the correct label
        # Ratio of how much the model favoured the true label, compared to others
        # So the cell of each label in a given image's row, shows how much the correct label is favoured over that label. 
        pc[i,:] = np.divide(
                            prob[i,loc_target], 
                            prob[i,:] + prob[i,loc_target]
                            )
        
        # Sets the correct label's cell to NaN, to mark the correct response
        pc[i,loc_target] = np.nan 
        
    return pc

In [None]:
# Load the meta data
meta= pd.read_csv('./data/meta_data.csv')

# Load the cleaned neural data
all_rates_avg = np.load('./data/neural_data/all_rates.npy')
all_rates = np.load('./data/neural_data/all_rates_repetitions.npy')

# Get the total number of images
nrImages = all_rates_avg.shape[1] 
 
# Create time bins
bin_size = 4 # bins of 40ms 
bins = [all_rates_avg[i:i + bin_size] for i in range(0, all_rates_avg.shape[0] - bin_size + 1, 1)]
bins_with_reps = [all_rates[i:i + bin_size] for i in range(0, all_rates.shape[0] - bin_size + 1, 1)]

# Initialize data frames
i_1_df = pd.DataFrame()
bin_performance_summary_df = pd.DataFrame()

In [None]:
# Iterate through the time bins, get the object decoding accuracy for each time bin
for idx, current_bin in enumerate(bins):
    
    start_time = idx * 10
    end_time = (idx + 3) * 10
    print(f"Processing bin {idx}: Time frame from {start_time} to {end_time}")
            
    # Pre-process the bin's neural data
    current_bin = np.mean(current_bin, axis = 0)  # [images, channels]
    neural_rates = current_bin.T # we should use [channels, images]
    
    print(f"Getting the labels ...")
    labels = meta['obj'].to_numpy().astype(str)  # Get the labels from metadata
    unique_labels = np.array(list(dict.fromkeys(labels)))
    
    i_1 = np.zeros((nrImages,20), dtype=float)
    i_1[:]=np.nan
            
    accuracies_test_list = []
    accuracies_train_list = []
    
    for j in range(20):
        
        print('Decoding nrBS: '+str(j))
        
        # Train and test the decoder for this bootstrap, get the probabilities predicted
        p, accuracies_test, accuracies_train = decode(neural_rates, labels, nrfolds=10,  seed=j) # [n_image, n_labels] For each image, a probability value assigned for each label.
        
        accuracies_test_list.append(accuracies_test)
        accuracies_train_list.append(accuracies_train)
        
        # Get the accuracy from the probabilities (predictions)
        pc = get_probability_correct(p, labels, np.array(unique_labels)) # [n_image, n_labels] For each image, a probability value assigned for each label.
        
        # Probability of the correct label being chosen, over the other labels
        i_1[:,j] = np.nanmean(pc, axis=1) 
    
    plt.hist(np.mean(i_1,axis=1))
    plt.title(f'{start_time} - {end_time} ms')
    plt.savefig(f'./results/neural_decoding_performance_{start_time}.png')
    plt.show()
    print("Saved the figure ...")
    plt.clf() 

    bin_df = pd.DataFrame(i_1, columns=[f'{start_time}_{i}' for i in range(20)]) 
    i_1_df = pd.concat([i_1_df, bin_df], axis=1)
    print("Updated results dataframe ...")

    bin_performance_summary = pd.DataFrame(np.mean(i_1,axis=1), columns=[f'{start_time}']) 
    bin_performance_summary_df = pd.concat([bin_performance_summary_df, bin_performance_summary], axis=1)
    print("Updated summary dataframe ...")

i_1_df.to_csv('./results/neural_decoding_results.csv', index = False)
bin_performance_summary_df.to_csv('./results/neural_decoding_results_summary.csv', index = False)