## Predicting features of images from features extracted from models 

This notebook predicts the size of an object, based on the features extracted from various vision models.

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import pickle
import re
import pingouin as pg
from sklearn.random_projection import SparseRandomProjection

In [None]:
def predict_size_from_features(size=None, features=None, n_comp = 10, nrBS=20, k_parm=1000, nrFolds=10, regress_type='pls'):
    """
    Predict affect based on features using specified regression model.
    
    Parameters:
        size (np.array): Size variable, Mx1 array (size value for M images).
        features (np.array): Neural/model responses, MxN array (M images units, N units).
        nrBS (int): Number of bootstraps.
        k_parm (int): Ridge regression parameter.
        nrFolds (int): Number of cross-validation folds.
        regress_type (str): Type of regression to use ('pls' or 'ridge').

    Returns:
        predictions (np.array): Predicted values, MxnrBS array.
    """
    nrImages = depth.shape[0]
    predictions = np.full((nrImages, nrBS), np.nan)
    
    for j in range(nrBS):
        print(f' Bootstrap number: {j}')

        # Initialize KFold cross-validation
        kf = KFold(n_splits=nrFolds, shuffle=True, random_state=j)
        
        for train_index, test_index in kf.split(depth):
            if regress_type == 'ridge':
                # Ridge regression
                ridge_model = Ridge(alpha=k_parm)
                ridge_model.fit(features[:, train_index].T, depth[train_index])
                predictions[test_index, j] = ridge_model.predict(features[:, test_index].T)
                
            elif regress_type == 'pls':
                # PLS regression
                pls_model = PLSRegression(n_components=n_comp)
                pls_model.fit(features[train_index, :], depth[train_index])
                predictions[test_index, j] = pls_model.predict(features[test_index, :]).flatten()
                
            else:
                raise ValueError("Invalid regression type. Use 'pls' or 'ridge'.")

    return predictions

In [None]:
# Load meta data

meta_data_dict = pd.read_pickle('./meta_data/single_object_meta.pkl')
meta_data = pd.read_csv("./meta_data/single_object_images.csv")

size_array = meta_data['size'].to_numpy().reshape((444, 1))

# Sub-set the data 
# Here, I am subsetting it to single object images 
indices_to_keep =[]
for key in meta_data_dict.keys():
    match = re.search(r'im(\d+)\.png', key)
    if match:
        number = int(match.group(1))  # Extract the number
        indices_to_keep.append(number)

indices_to_keep = sorted(indices_to_keep) # These are the indices of the images we are running the analyses on

# Choose the models you want to decode 
models = ['resnet-50',
          'alexnet',
          'vgg-16' 
          'resnet-50',
          'resnet-101', 
          'resnet-152', 
          'densenet-121', 
          'densenet-201', 
          'densenet-169', 
          'squeezenet-1_0',
          'squeezenet-1_1',
          'inception-v3',
          'resnext-wsl']


In [None]:
# Iterate through the models 
for current_model in models:
    
    features = pd.read_pickle(f'./results/model_features/{current_model}_multiple_layers_features.pkl')
    
    # Create empty lists to store results for each layer
    layer_results = []
    
    # Iterate through the layers
    for key, value in features.items():
        
        # Get the layer name 
        layer = key
        
        print(f"Processing layer {layer} of model {current_model}.")
        
        # Preprocess the layer features
        feature = np.array(value) 
        feature = feature.reshape(1600, -1)
        feature = feature[indices_to_keep, :]
        
        # Dimentionality reduction
        random_features =  np.full((444, 1000, 50), np.nan) 

        for i in range (50): 
            
            reducer = SparseRandomProjection(n_components = 1000)
            features_reduced = reducer.fit_transform(feature)
            random_features[:, :, i] = features_reduced

        reduced_dimensions_mean = np.mean(random_features, axis=2)

        # Get the size predictions of this layer
        
        predictions = predict_size_from_features(size_array, reduced_dimensions_mean, n_comp= 20, regress_type='pls')
        preds = np.mean(predictions, axis=1)
    
        results = pg.corr(preds, np.mean(size_array,axis=1), method='pearson')    
        size_corr = results['r'].values[0]
        size_conf_int = tuple(results['CI95%'].values[0])
        
        # Append results for this layer
        layer_results.append({
            'Layer': layer,
            'Correlation_size': round(size_corr, 3),
            'Size_Conf_Int_Lower': size_conf_int[0],
            'Size_Conf_Int_Upper': size_conf_int[1],
        })

    # Convert layer results to a DataFrame after all layers have been processed
    results_df = pd.DataFrame(layer_results)
        
    # Set Layer as the index
    results_df = results_df.set_index('Layer')
        
    # Save to CSV for the current model
    results_df.to_csv(f'./results/{current_model}_predictions_results.csv')
