# PCA on High-Level Dimensions

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from IPython.display import display
import analysis_utils as au
import altair as alt
from plot import add_na_mask_from_flag

warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
pd.set_option('display.max_columns', None)

In [3]:
try:
    df_raw = pd.read_csv('./data/super_experiment_design_space.csv')
    print(f"Successfully loaded raw dataset. Shape: {df_raw.shape}")
except FileNotFoundError:
    print("Error: './data/super_experiment_design_space.csv' not found.")

df_pca_features, numerical_cols, categorical_cols, df_processed = au.preprocess(df_raw)
print("\nPreprocessing complete.")
display(df_pca_features.head())
display(df_pca_features[df_pca_features.isnull().any(axis=1)])

Successfully loaded raw dataset. Shape: (349, 24)

Preprocessing complete.


Unnamed: 0,Task 2 Response Probability,Inter-task SOA,Distractor SOA,Task 1 CSI,Task 2 CSI,RSI,Switch Rate,Task 1 Difficulty,Task 2 Difficulty,Inter_task_SOA_is_NA,Distractor_SOA_is_NA,Task_2_CSI_is_NA,Task_2_Difficulty_is_NA,Stimulus_Stimulus_Congruency_Mapped,Stimulus_Response_Congruency_Mapped,Response_Set_Overlap_Mapped,RSI is Predictable,Task_1_Stimulus-Response_Mapping_Mapped,Task_1_Cue_Type_Mapped,Task_2_Stimulus-Response_Mapping_Mapped,Task_2_Cue_Type_Mapped,Trial_Transition_Type_Mapped
0,1,500.0,12.55814,0,0.0,1000.0,0.0,0.0,0.0,0,1,0,0,SS_NA,SR_NA,RSO_Identical,1,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure
1,1,1000.0,12.55814,0,0.0,1000.0,0.0,0.0,0.0,0,1,0,0,SS_NA,SR_NA,RSO_Identical,1,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure
2,1,2000.0,12.55814,0,0.0,1000.0,0.0,0.0,0.0,0,1,0,0,SS_NA,SR_NA,RSO_Identical,1,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure
3,1,4000.0,12.55814,0,0.0,1000.0,0.0,0.0,0.0,0,1,0,0,SS_NA,SR_NA,RSO_Identical,1,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure
4,1,1000.0,12.55814,0,0.0,1000.0,0.0,0.5,0.5,0,1,0,0,SS_NA,SR_NA,RSO_Identical,1,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure


Unnamed: 0,Task 2 Response Probability,Inter-task SOA,Distractor SOA,Task 1 CSI,Task 2 CSI,RSI,Switch Rate,Task 1 Difficulty,Task 2 Difficulty,Inter_task_SOA_is_NA,Distractor_SOA_is_NA,Task_2_CSI_is_NA,Task_2_Difficulty_is_NA,Stimulus_Stimulus_Congruency_Mapped,Stimulus_Response_Congruency_Mapped,Response_Set_Overlap_Mapped,RSI is Predictable,Task_1_Stimulus-Response_Mapping_Mapped,Task_1_Cue_Type_Mapped,Task_2_Stimulus-Response_Mapping_Mapped,Task_2_Cue_Type_Mapped,Trial_Transition_Type_Mapped


### Exploratory Data Analysis

In [20]:
# List of the original categorical columns we want to inspect
categorical_columns_to_explore = [
    'Response Set Overlap',
    'Stimulus-Stimulus Congruency',
    'Stimulus-Response Congruency',
    'Task 1 Stimulus-Response Mapping',
    'Task 1 Cue Type',
    'Task 2 Stimulus-Response Mapping',
    'Task 2 Cue Type'
]
numeric_columns_to_explore = [
    'Inter-task SOA',
    'Distractor SOA',
    'Task 1 CSI',
    'Task 2 CSI', 
    'Task 2 Difficulty'
]

# Loop through the columns and print their value counts
for col in categorical_columns_to_explore:
    print(f"\nValue counts for column: '{col}'")
    # Use dropna=False to explicitly see the count of any missing values (NaNs)
    print(df_processed[col].value_counts(dropna=False))
    print("-" * 30)

print("-" * 60)
print("Review the counts above. If a sub-category has very few examples (e.g., < 5-10),")
print("it is generally better to collapse it into a broader category for the PCA.")
print("This analysis should guide the modifications to the mapping functions in Stage 3.")

for col in numeric_columns_to_explore:
    print(col, au.check_skewness(df_processed[col]))

print("-" * 60)
print("Review the skewness of the numeric columns. If they are skewed it'll be better to")
print("impute with the median instead of the mean.")


Value counts for column: 'Response Set Overlap'
Response Set Overlap
Identical                              142
NaN                                    115
Disjoint - Modality                     32
Disjoint - Category (Same Modality)     23
Disjoint - Effector                     22
Disjoint - Modality (Standard)           8
Disjoint - Modality (Non-Standard)       7
Name: count, dtype: int64
------------------------------

Value counts for column: 'Stimulus-Stimulus Congruency'
Stimulus-Stimulus Congruency
NaN                             97
Neutral                         95
Incongruent                     94
Congruent                       57
Neutral (Feature Similar)        3
Neutral (Feature Dissimilar)     3
Name: count, dtype: int64
------------------------------

Value counts for column: 'Stimulus-Response Congruency'
Stimulus-Response Congruency
NaN            299
Congruent       23
Incongruent     23
Neutral          4
Name: count, dtype: int64
------------------------------


## PCA Fitting

In [21]:
pipeline = au.create_pca_pipeline(numerical_cols, categorical_cols)
pipeline.fit(df_pca_features)
pca_results = pipeline.transform(df_pca_features)
column_names = [f'PC{i+1}' for i in range(pca_results.shape[1])]

# 5. Create a Pandas DataFrame with the specified column names
pca_df = pd.DataFrame(pca_results, columns=column_names)

print("PCA pipeline fitted successfully.")

PCA pipeline fitted successfully.


In [22]:
pca = pipeline.named_steps['pca']
explained_variance = pca.explained_variance_ratio_
cumulative_variance = np.cumsum(explained_variance)

print("\n--- Explained Variance ---")
for i, (var, cum_var) in enumerate(zip(explained_variance, cumulative_variance)):
    if cum_var > 0.95: break
    print(f"PC{i+1}: Explained Variance = {var:.3f}, Cumulative Variance = {cum_var:.3f}")
    
loadings = au.get_component_loadings(pipeline, numerical_cols, categorical_cols)
print("\n--- Principal Component Loadings ---")
display(loadings.round(3))


--- Explained Variance ---
PC1: Explained Variance = 0.191, Cumulative Variance = 0.191
PC2: Explained Variance = 0.168, Cumulative Variance = 0.359
PC3: Explained Variance = 0.121, Cumulative Variance = 0.480
PC4: Explained Variance = 0.087, Cumulative Variance = 0.567
PC5: Explained Variance = 0.079, Cumulative Variance = 0.646
PC6: Explained Variance = 0.073, Cumulative Variance = 0.719
PC7: Explained Variance = 0.053, Cumulative Variance = 0.772
PC8: Explained Variance = 0.042, Cumulative Variance = 0.814
PC9: Explained Variance = 0.030, Cumulative Variance = 0.844
PC10: Explained Variance = 0.026, Cumulative Variance = 0.871
PC11: Explained Variance = 0.019, Cumulative Variance = 0.890
PC12: Explained Variance = 0.017, Cumulative Variance = 0.907
PC13: Explained Variance = 0.016, Cumulative Variance = 0.923
PC14: Explained Variance = 0.014, Cumulative Variance = 0.938
PC15: Explained Variance = 0.012, Cumulative Variance = 0.949

--- Principal Component Loadings ---


Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20,PC21,PC22,PC23,PC24,PC25,PC26,PC27,PC28,PC29,PC30,PC31,PC32,PC33,PC34,PC35,PC36,PC37,PC38,PC39,PC40
Task 2 Response Probability,-0.331,-0.226,0.404,-0.261,0.405,-0.035,0.099,0.076,-0.065,-0.049,0.062,-0.027,-0.034,0.081,0.018,-0.125,0.218,0.21,-0.066,-0.234,0.127,-0.22,0.025,0.161,0.043,-0.06,0.012,0.16,0.119,-0.325,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0
Inter-task SOA,0.022,-0.01,-0.114,-0.321,0.079,0.875,-0.062,-0.275,-0.11,0.03,0.031,0.011,0.01,-0.035,0.069,-0.011,0.01,0.009,0.101,0.013,-0.015,0.037,-0.003,0.003,0.002,-0.001,-0.005,0.002,0.008,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0
Distractor SOA,-0.021,0.014,-0.092,0.652,0.435,0.319,0.125,0.446,-0.042,0.138,-0.115,0.005,-0.074,-0.056,0.102,0.015,0.036,-0.025,0.033,0.01,-0.0,-0.004,0.064,0.018,0.016,-0.002,0.004,0.011,-0.003,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,-0.0
Task 1 CSI,0.334,0.365,0.366,-0.146,0.109,0.012,-0.062,0.081,0.205,0.087,-0.032,-0.077,-0.387,0.053,0.293,0.301,-0.356,0.223,0.05,-0.09,0.03,-0.023,0.03,-0.019,0.031,0.008,0.014,0.005,0.004,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0
Task 2 CSI,0.348,0.347,0.341,-0.078,0.129,0.024,-0.337,0.132,0.062,0.048,-0.088,0.023,0.118,-0.006,-0.206,-0.32,0.447,-0.299,0.046,0.113,-0.075,0.043,-0.038,0.028,-0.025,-0.007,-0.022,-0.008,0.001,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0
RSI,0.025,0.387,0.164,-0.058,-0.2,0.093,0.829,0.071,-0.134,-0.163,0.044,0.001,0.049,0.022,-0.058,-0.036,0.102,-0.09,0.006,0.049,-0.026,-0.023,-0.048,-0.012,-0.001,-0.015,-0.003,-0.011,-0.005,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0
Switch Rate,0.429,-0.266,0.112,0.19,-0.346,0.066,0.06,-0.104,-0.315,0.34,-0.044,-0.006,-0.216,0.005,-0.001,-0.233,0.139,0.185,-0.272,-0.238,0.142,-0.177,0.02,-0.021,-0.008,-0.011,0.04,-0.013,-0.03,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,0.0
Task 1 Difficulty,-0.331,0.05,0.34,0.04,-0.423,0.171,-0.323,0.398,-0.427,-0.136,0.074,0.01,0.022,0.02,0.048,0.135,-0.169,-0.109,-0.117,0.116,-0.022,-0.039,0.008,0.002,-0.001,0.034,0.008,0.021,0.011,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0
Task 2 Difficulty,-0.307,-0.04,0.401,0.382,-0.224,0.174,-0.005,-0.366,0.443,-0.115,-0.198,-0.057,-0.196,-0.167,-0.196,-0.104,-0.009,0.026,0.022,-0.034,-0.055,0.084,-0.022,0.016,-0.003,-0.018,-0.042,-0.031,0.01,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0
Inter_task_SOA_is_NA_1,0.109,0.069,-0.138,0.076,-0.103,-0.0,-0.043,-0.034,-0.032,-0.078,0.03,-0.026,-0.075,0.015,-0.083,0.03,-0.061,-0.0,0.082,0.062,-0.07,-0.12,-0.031,0.851,0.249,-0.288,-0.059,0.123,0.01,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0


### Top weights for the first two components

In [23]:
display(loadings["PC1"].sort_values(key=abs, ascending=False).round(3)[:16])

Switch Rate                                        0.429
Task 2 CSI                                         0.348
Task 1 CSI                                         0.334
Task 1 Difficulty                                 -0.331
Task 2 Response Probability                       -0.331
Task 2 Difficulty                                 -0.307
Trial_Transition_Type_Mapped_TTT_Pure             -0.245
Response_Set_Overlap_Mapped_RSO_Identical          0.153
Task_2_Cue_Type_Mapped_TCT2_Arbitrary              0.136
Stimulus_Stimulus_Congruency_Mapped_SS_NA         -0.126
Trial_Transition_Type_Mapped_TTT_Switch            0.125
Task_2_Cue_Type_Mapped_TCT2_NA                    -0.121
Task_2_Stimulus-Response_Mapping_Mapped_SRM2_NA   -0.121
Task_2_CSI_is_NA_1                                -0.121
Task_2_Difficulty_is_NA_1                         -0.121
Trial_Transition_Type_Mapped_TTT_Repeat            0.121
Name: PC1, dtype: float64

In [24]:
display(loadings["PC2"].sort_values(key=abs, ascending=False).round(3)[:16])

RSI                                                       0.387
Task 1 CSI                                                0.365
Task 2 CSI                                                0.347
Switch Rate                                              -0.266
Task_2_Cue_Type_Mapped_TCT2_Implicit                     -0.258
Task_2_Cue_Type_Mapped_TCT2_NA                            0.237
Task_2_Stimulus-Response_Mapping_Mapped_SRM2_NA           0.237
Task_2_CSI_is_NA_1                                        0.237
Task_2_Difficulty_is_NA_1                                 0.237
Response_Set_Overlap_Mapped_RSO_NA                        0.234
Task 2 Response Probability                              -0.226
Task_2_Stimulus-Response_Mapping_Mapped_SRM2_Arbitrary   -0.206
Trial_Transition_Type_Mapped_TTT_Pure                     0.153
Response_Set_Overlap_Mapped_RSO_Identical                -0.136
Response_Set_Overlap_Mapped_RSO_Disjoint                 -0.098
Trial_Transition_Type_Mapped_TTT_Switch 

In [27]:
display(loadings["PC3"].sort_values(key=abs, ascending=False).round(3)[:16])

Task 2 Response Probability                                0.404
Task 2 Difficulty                                          0.401
Task 1 CSI                                                 0.366
Task 2 CSI                                                 0.341
Task 1 Difficulty                                          0.340
Response_Set_Overlap_Mapped_RSO_Disjoint                   0.195
RSI                                                        0.164
Task_2_CSI_is_NA_1                                        -0.150
Task_2_Stimulus-Response_Mapping_Mapped_SRM2_NA           -0.150
Task_2_Difficulty_is_NA_1                                 -0.150
Task_2_Cue_Type_Mapped_TCT2_NA                            -0.150
Response_Set_Overlap_Mapped_RSO_NA                        -0.149
Inter_task_SOA_is_NA_1                                    -0.138
Task_2_Stimulus-Response_Mapping_Mapped_SRM2_Compatible    0.118
Inter-task SOA                                            -0.114
Switch Rate              

In [28]:
display(loadings["PC4"].sort_values(key=abs, ascending=False).round(3)[:16])

Distractor SOA                                            0.652
Task 2 Difficulty                                         0.382
Inter-task SOA                                           -0.321
Task 2 Response Probability                              -0.261
Task_1_Cue_Type_Mapped_TCT_Implicit                      -0.192
Switch Rate                                               0.190
Task_1_Stimulus-Response_Mapping_Mapped_SRM_Compatible    0.164
Task_1_Stimulus-Response_Mapping_Mapped_SRM_Arbitrary    -0.160
Task_2_Cue_Type_Mapped_TCT2_Implicit                     -0.147
Task 1 CSI                                               -0.146
Task_2_Cue_Type_Mapped_TCT2_Arbitrary                     0.124
Response_Set_Overlap_Mapped_RSO_Identical                -0.104
Stimulus_Stimulus_Congruency_Mapped_SS_NA                -0.086
Response_Set_Overlap_Mapped_RSO_Disjoint                  0.081
Task 2 CSI                                               -0.078
Inter_task_SOA_is_NA_1                  

### Find Centroids of Paradigms and Interpolate Them

In [25]:
def hex_to_rgb(hex_color):
    """Converts a hex color string to an (R, G, B) tuple."""
    hex_color = hex_color.lstrip('#')
    return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
# --- 1. Combine the cleaned DF with the DF in the PC space ---
df_processed_no_paradigm = df_processed.drop(columns=['Paradigm'])
plot_df = pd.concat([
    df_processed.reset_index(drop=True),
    pca_df.reset_index(drop=True)
], axis=1)
plot_df['Point Type'] = 'Empirical Data' # Label these as original points

# --- 2. Calculate and Prepare Centroids for Plotting ---

# First, get a list of just the Principal Component column names
pc_cols = [col for col in plot_df.columns if col.startswith('PC')]
# Find the centroids in PC space
paradigm_centroids_pc = au.find_centroids(plot_df[pc_cols + ['Paradigm']], paradigm_col='Paradigm')

# Convert the dictionary to a DataFrame for Altair
centroids_df = pd.DataFrame.from_dict(paradigm_centroids_pc, orient='index').reset_index()
centroids_df = centroids_df.rename(columns={'index': 'Paradigm'})
centroids_df['Point Type'] = 'Centroid'
print("\nCalculated Centroids in PC Space:")
display(centroids_df)

# --- 3. Interpolate Paradigms and Inverse-Transform Them Back To The Original Space ---
interpolated_points_list = []
interpolation_pairs = [
    ('Dual-Task_PRP', 'Task Switching'),
    ('Dual-Task_PRP', 'Interference'),
    ('Task Switching', 'Interference')
]

for p1_name, p2_name in interpolation_pairs:
    # Get the centroids for the pair
    centroid1 = paradigm_centroids_pc.get(p1_name)
    centroid2 = paradigm_centroids_pc.get(p2_name)
    
    if not centroid1 or not centroid2:
        print(f"Warning: Could not find centroids for pair ({p1_name}, {p2_name}). Skipping interpolation.")
        continue

    # Interpolate to find the midpoint in PC space
    # We need to get just the PC values for interpolation
    pc_cols = [f'PC{i+1}' for i in range(pca_results.shape[1])]
    interpolated_pc_coords = au.interpolate_centroids(
        {k: v for k, v in centroid1.items() if k in pc_cols},
        {k: v for k, v in centroid2.items() if k in pc_cols},
        alpha=0.5
    )
    
    # Inverse transform the point back to the original high-level dimension space
    original_space_params = au.inverse_transform_point(interpolated_pc_coords, pipeline)
    
    # Create a dictionary for this new point, including its PC coordinates
    new_point = original_space_params.to_dict()
    for i, pc_coord in enumerate(interpolated_pc_coords):
        new_point[f'PC{i+1}'] = pc_coord
        
    # Add metadata for this point
    new_point['Point Type'] = 'Interpolated'
    new_point['Experiment'] = f"Interpolation: {p1_name} <-> {p2_name}"
    new_point['Paradigm'] = 'Interpolated Point' # Assign a unique paradigm name for styling
    new_point['Parent1'] = p1_name
    new_point['Parent2'] = p2_name
    
    interpolated_points_list.append(new_point)

# Create a DataFrame from the list of interpolated points
interpolated_df = pd.DataFrame(interpolated_points_list)
interpolated_df = au.reverse_map_categories(interpolated_df)
interpolated_df = au.apply_conceptual_constraints(interpolated_df)
# Create placeholder columns in interpolated_df that exist in plot_df but not in interpolated_df
#for col in plot_df.columns:
#    if col not in interpolated_df.columns and col not in pc_cols:
#         interpolated_df[col] = 'Interpolated' # or np.nan
print("\nGenerated Interpolated Points (showing a few key derived dimensions):")
print(interpolated_df.columns)
display(interpolated_df[['Experiment', 'Task 2 Response Probability', 'Task_2_Difficulty_is_NA', 'Task 2 Difficulty', 'Task_2_Stimulus-Response_Mapping_Mapped', 'Task 2 Stimulus-Response Mapping', 'Stimulus-Stimulus Congruency', 'Inter-task SOA', 'Distractor_SOA_is_NA', 'Switch Rate']].round(2))

# --- 4. Combine all DataFrames into one for plotting ---
final_plot_df = pd.concat([plot_df, centroids_df, interpolated_df], ignore_index=True)
final_plot_df['Parent1'] = final_plot_df['Parent1'].fillna('N/A')
final_plot_df['Parent2'] = final_plot_df['Parent2'].fillna('N/A')

# Undo the imputation on columns that were originally 'N/A'
final_plot_df = add_na_mask_from_flag(final_plot_df)


Calculated Centroids in PC Space:


Unnamed: 0,Paradigm,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20,PC21,PC22,PC23,PC24,PC25,PC26,PC27,PC28,PC29,PC30,PC31,PC32,PC33,PC34,PC35,PC36,PC37,PC38,PC39,PC40,Point Type
0,Dual-Task_PRP,-1.955188,-1.175146,1.51436,-0.703609,0.994622,-0.07871,0.16252,0.099421,-0.060481,-0.039466,0.03631,-0.014696,-0.016778,0.036441,0.006461,-0.041107,0.063531,0.053478,-0.010984,-0.031208,0.016617,-0.018662,0.001284,0.005844,0.001396,-0.001376,0.000183,0.002069,0.00114,-7.669791e-16,7.586524000000001e-17,-2.775558e-17,3.700743e-17,-8.881784000000001e-17,-1.8966310000000002e-17,3.28441e-17,-7.586524000000001e-17,-6.753857000000001e-17,-2.132553e-16,3.359622e-17,Centroid
1,Interference,-0.97579,1.683156,-0.771405,0.12604,-0.229106,-0.022398,-0.157947,-0.0008,-0.102954,-0.12215,0.008708,-0.023443,-0.041771,-0.039534,0.081704,-0.093748,0.075009,0.077455,0.021575,-0.015357,0.0061,0.01504,-0.017098,-0.018082,-0.00219,0.013002,-0.006431,0.004335,0.000318,-2.828707e-16,-1.883836e-16,3.7794830000000006e-17,8.976271000000001e-17,-1.181088e-16,-1.417306e-16,-1.275575e-16,7.558965e-17,3.029492e-16,2.244068e-16,-4.3293750000000006e-17,Centroid
2,Single-Task,-0.942182,1.634362,-0.743601,-0.094726,0.143468,-0.145499,-0.09266,-0.339147,0.286848,0.889728,-0.154715,0.061264,-0.040232,0.164907,-0.276736,0.275524,-0.202058,-0.223519,-0.160474,0.013822,0.008077,-0.07402,0.068357,0.071998,0.008223,-0.05754,0.033864,-0.029947,0.008525,-2.039431e-16,-3.258263e-16,3.234128e-16,6.275174e-17,-4.827057e-16,-6.757879e-17,-6.516526000000001e-17,7.723291e-17,3.004843e-16,2.075634e-16,-2.0134150000000002e-17,Centroid
3,Task Switching,1.341313,-0.728478,-0.007248,0.189229,-0.240937,0.059154,0.042017,0.011107,0.039006,-0.038452,0.003263,0.009746,0.034061,-0.013158,-0.009901,0.028731,-0.036136,-0.031096,0.013499,0.017431,-0.01021,0.008188,-0.000245,-0.001784,-0.00039,0.001069,-0.001078,0.000914,-0.001711,3.701819e-16,-1.723428e-16,2.000983e-16,-1.05213e-16,1.9364360000000003e-17,6.454785e-19,2.7432840000000003e-17,-6.357963000000001e-17,2.097805e-17,-1.444258e-16,4.597795e-17,Centroid


Is single task: 0    False
1     True
2     True
dtype: bool



Generated Interpolated Points (showing a few key derived dimensions):
Index(['Task 2 Response Probability', 'Inter-task SOA', 'Distractor SOA',
       'Task 1 CSI', 'Task 2 CSI', 'RSI', 'Switch Rate', 'Task 1 Difficulty',
       'Task 2 Difficulty', 'Inter_task_SOA_is_NA', 'Distractor_SOA_is_NA',
       'Task_2_CSI_is_NA', 'Task_2_Difficulty_is_NA',
       'Stimulus_Stimulus_Congruency_Mapped',
       'Stimulus_Response_Congruency_Mapped', 'Response_Set_Overlap_Mapped',
       'RSI is Predictable', 'Task_1_Stimulus-Response_Mapping_Mapped',
       'Task_1_Cue_Type_Mapped', 'Task_2_Stimulus-Response_Mapping_Mapped',
       'Task_2_Cue_Type_Mapped', 'Trial_Transition_Type_Mapped', 'PC1', 'PC2',
       'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12',
       'PC13', 'PC14', 'PC15', 'PC16', 'PC17', 'PC18', 'PC19', 'PC20', 'PC21',
       'PC22', 'PC23', 'PC24', 'PC25', 'PC26', 'PC27', 'PC28', 'PC29', 'PC30',
       'PC31', 'PC32', 'PC33', 'PC34', 'PC35', 'PC36', 'PC3

Unnamed: 0,Experiment,Task 2 Response Probability,Task_2_Difficulty_is_NA,Task 2 Difficulty,Task_2_Stimulus-Response_Mapping_Mapped,Task 2 Stimulus-Response Mapping,Stimulus-Stimulus Congruency,Inter-task SOA,Distractor_SOA_is_NA,Switch Rate
0,Interpolation: Dual-Task_PRP <-> Task Switching,0.5,1,2.676357,SRM2_Arbitrary,Arbitrary,,1279.17,1,23.35
1,Interpolation: Dual-Task_PRP <-> Interference,0.5,1,,SRM2_NA,,,1279.17,1,0.42
2,Interpolation: Task Switching <-> Interference,0.0,1,,SRM2_NA,,Neutral,1279.17,1,22.94


### Plot Experimental Conditions

In [26]:
display(final_plot_df.head())
chart_layers = []

# Define the color scheme explicitly so we can reuse it
paradigm_colors = alt.Scale(domain=['Dual-Task_PRP', 'Interference', 'Task Switching', 'Single-Task', 'Other'], 
                            range=['#440154', '#34CBAF', '#CB3450', '#FFA500', '#cccccc'])

# --- 5. Define Tooltip Columns ---
tooltip_cols = [
    alt.Tooltip('Experiment:N', title='Experiment'),
    alt.Tooltip('Paradigm:N', title='Paradigm Class'),
    alt.Tooltip('Task 2 Response Probability:Q', title='T2 Response Probability', format='.2f'),
    alt.Tooltip('Inter-task SOA:N', title='Inter-task SOA'),
    alt.Tooltip('Distractor SOA:N', title='Distractor SOA'),
    alt.Tooltip('Task 1 CSI:Q', title='T1 CSI', format='.0f'),
    alt.Tooltip('Task 2 CSI:Q', title='T2 CSI', format='.0f'),
    alt.Tooltip('RSI:Q', title='RSI', format='.0f'),
    alt.Tooltip('RSI is Predictable:N', title='RSI Predictable'),
    alt.Tooltip('Switch Rate:Q', title='Switch Rate (%)', format='.1f'),
    alt.Tooltip('Trial Transition Type:N', title='Transition Type'),
    alt.Tooltip('Stimulus-Stimulus Congruency:N', title='Stimulus-Stimulus Congruency'),
    alt.Tooltip('Stimulus-Response Congruency:N', title='Stimulus-Response Congruency'),
    alt.Tooltip('Response Set Overlap:N', title='Response Set Overlap'),
    alt.Tooltip('Task 1 Stimulus-Response Mapping:N', title='T1 Stimulus-Response Mapping'),
    alt.Tooltip('Task 2 Stimulus-Response Mapping:N', title='T2 Stimulus-Response Mapping'),
    alt.Tooltip('Task 1 Difficulty:Q', title='T1 Difficulty', format='.1f'),
    alt.Tooltip('Task 2 Difficulty:Q', title='T2 Difficulty', format='.1f'),
    alt.Tooltip('Task_2_CSI_is_NA:N', title='T2 CSI is N/A'),
    alt.Tooltip('Task_2_Difficulty_is_NA:N', title='T2 Difficulty is N/A'),
    alt.Tooltip('Inter_task_SOA_is_NA:N', title='Inter-task SOA is N/A'),
    alt.Tooltip('Distractor_SOA_is_NA:N', title='Distractor SOA is N/A')
]

# --- 6. Create the Layered Altair Chart ---

# Layer 1: Centroids (large, transparent circles)
# Zip the domain and range to loop through them together
for category, hex_color in zip(paradigm_colors.domain, paradigm_colors.range):

    # 1. Convert the hex color to its RGB components
    r, g, b = hex_to_rgb(hex_color)

    # 2. Create the RGBA string for the gradient
    transparent_rgba = f'rgba({r}, {g}, {b}, 0)'
    # 3. Create the chart layer for this category
    layer = alt.Chart(final_plot_df).mark_circle(size=20000).encode(
        x='PC1:Q',
        y='PC2:Q',
        color=alt.value({
            "gradient": "radial",
            "stops": [
                {"offset": 0, "color": hex_color},          # The solid hex color at the center
                {"offset": 1, "color": transparent_rgba}    # The transparent version at the edge
            ]
        })
    ).transform_filter(
        (alt.datum.Paradigm == category) & (alt.datum['Point Type'] == 'Centroid') # Filter for the current category
    )

    chart_layers.append(layer)

# Layer 2: Empirical Data Points (standard circles)
empirical_chart = alt.Chart(final_plot_df).mark_circle(
    size=100,
    opacity=0.8
).encode(
    x=alt.X('PC1:Q', title='Principal Component 1 (Task-Set Dynamics: Sequential Switching vs. Concurrent Coordination)'),
    y=alt.Y('PC2:Q', title='Principal Component 2 (Task Set Size '),
    color=alt.Color('Paradigm:N', title='Paradigm Class', scale=paradigm_colors),
    tooltip=tooltip_cols
).transform_filter(
    alt.datum['Point Type'] == 'Empirical Data'
)
chart_layers.append(empirical_chart)

# Layer 3: Interpolated Points (large, black diamonds to stand out)
interpolated_chart = alt.Chart(final_plot_df).mark_point(
    size=400,
    shape='M0,.5L.6,.8L.5,.1L1,-.3L.3,-.4L0,-1L-.3,-.4L-1,-.3L-.5,.1L-.6,.8L0,.5Z',
    filled=True,
    strokeWidth=4 # Make the stroke thicker to be more visible
).encode(
    x=alt.X('PC1:Q'),
    y=alt.Y('PC2:Q'),
    # Use the 'Parent1' and 'Parent2' columns to drive the colors
    color=alt.Color('Parent1:N', title='Paradigm Class', scale=paradigm_colors, legend=None), # Fill color
    stroke=alt.Color('Parent2:N', title='Paradigm Class', scale=paradigm_colors, legend=None), # Stroke color
    tooltip=tooltip_cols
).transform_filter(
    alt.datum['Point Type'] == 'Interpolated'
)
chart_layers.append(interpolated_chart)

# --- 7. Add Zero Lines and Combine Layers ---
zero_line_h = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(strokeDash=[5,5], color='grey').encode(y='y')
zero_line_v = alt.Chart(pd.DataFrame({'x': [0]})).mark_rule(strokeDash=[5,5], color='grey').encode(x='x')
chart_layers.append(zero_line_h)
chart_layers.append(zero_line_v)

# Layer all the charts together. The order is important: bottom layers are drawn first.
final_layered_chart = alt.layer(
    #centroids_chart,
    *chart_layers
).properties(
    title='PCA of Cognitive Control Paradigms with Centroids and Interpolated Points',
    width=800,
    height=800
).interactive()

final_layered_chart = final_layered_chart.resolve_scale(
    color='independent',
    stroke='independent'
).resolve_legend(
    color='shared'
)
final_layered_chart.save('chart.json')
# Display the final chart
final_layered_chart

Unnamed: 0,Experiment,Number of Tasks,Task 2 Response Probability,Inter-task SOA,Distractor SOA,Task 1 CSI,Task 2 CSI,Switch Rate,Trial Transition Type,Stimulus-Stimulus Congruency,Stimulus-Response Congruency,Response Set Overlap,Task 1 Stimulus-Response Mapping,Task 2 Stimulus-Response Mapping,Task 1 Cue Type,Task 2 Cue Type,RSI is Predictable,RSI,Task 1 Difficulty,Task 2 Difficulty,Task 1 Type,Task 2 Type,Notes,Super_Experiment_Mapping_Notes,Task 1 Difficulty Norm,Task 2 Difficulty Norm,Paradigm,Inter_task_SOA_is_NA,Distractor_SOA_is_NA,Task_2_CSI_is_NA,Task_2_Difficulty_is_NA,Stimulus_Stimulus_Congruency_Mapped,Stimulus_Response_Congruency_Mapped,Response_Set_Overlap_Mapped,Task_1_Stimulus-Response_Mapping_Mapped,Task_1_Cue_Type_Mapped,Task_2_Stimulus-Response_Mapping_Mapped,Task_2_Cue_Type_Mapped,Trial_Transition_Type_Mapped,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20,PC21,PC22,PC23,PC24,PC25,PC26,PC27,PC28,PC29,PC30,PC31,PC32,PC33,PC34,PC35,PC36,PC37,PC38,PC39,PC40,Point Type,Parent1,Parent2
0,Telford 1931 Auditory RT (500ms SOA),2.0,1.0,500.0,,0.0,0.0,0.0,Pure,,,Identical,Arbitrary,Arbitrary,None/Implicit,None/Implicit,1,1000.0,1.0,1.0,Auditory RT,Auditory RT,Both tasks were of the same task (i.e. two aud...,"{""param_overrides"": {""t1_stim_duration"":300, ""...",0.0,0.0,Dual-Task_PRP,0.0,1.0,0.0,0.0,SS_NA,SR_NA,RSO_Identical,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure,0.222804,-1.318361,-1.247083,-2.369483,2.966922,-1.60601,1.467164,1.096874,0.581893,0.902546,0.173534,0.13491,0.640939,0.726127,0.109072,-0.049259,0.585845,0.226441,-0.473893,-0.401314,0.403732,-0.403326,0.077554,-0.185149,-0.026617,-0.017196,0.159421,0.012742,-0.125997,-3.413936e-15,-5.5511150000000004e-17,1.110223e-16,-1.110223e-16,0.0,2.220446e-16,4.440892e-16,-1.110223e-16,1.387779e-16,-2.220446e-16,1.1170200000000001e-17,Empirical Data,,
1,Telford 1931 Auditory RT (1000ms SOA),2.0,1.0,1000.0,,0.0,0.0,0.0,Pure,,,Identical,Arbitrary,Arbitrary,None/Implicit,None/Implicit,1,1000.0,1.0,1.0,Auditory RT,Auditory RT,Both tasks were of the same task (i.e. two aud...,"{""param_overrides"": {""t1_stim_duration"":300, ""...",0.0,0.0,Dual-Task_PRP,0.0,1.0,0.0,0.0,SS_NA,SR_NA,RSO_Identical,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure,0.233284,-1.323408,-1.302024,-2.524447,3.00501,-1.18299,1.4374,0.963831,0.528838,0.916923,0.188722,0.140411,0.645936,0.709433,0.142333,-0.05459,0.590608,0.230663,-0.424886,-0.39504,0.396448,-0.385197,0.076232,-0.183926,-0.025841,-0.017798,0.156822,0.013945,-0.122281,-3.080869e-15,-5.5511150000000004e-17,-1.110223e-16,-1.110223e-16,0.0,1.110223e-16,3.885781e-16,-1.665335e-16,1.387779e-16,-2.220446e-16,8.599392e-18,Empirical Data,,
2,Telford 1931 Auditory RT (2000ms SOA),2.0,1.0,2000.0,,0.0,0.0,0.0,Pure,,,Identical,Arbitrary,Arbitrary,None/Implicit,None/Implicit,1,1000.0,1.0,1.0,Auditory RT,Auditory RT,Both tasks were of the same task (i.e. two aud...,"{""param_overrides"": {""t1_stim_duration"":300, ""...",0.0,0.0,Dual-Task_PRP,0.0,1.0,0.0,0.0,SS_NA,SR_NA,RSO_Identical,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure,0.254245,-1.333503,-1.411908,-2.834375,3.081188,-0.336949,1.377873,0.697746,0.422729,0.945678,0.219097,0.151414,0.65593,0.676046,0.208854,-0.065251,0.600134,0.239106,-0.326873,-0.38249,0.38188,-0.348941,0.073589,-0.181482,-0.024291,-0.019002,0.151623,0.01635,-0.11485,-2.303713e-15,-5.5511150000000004e-17,-1.110223e-16,-3.330669e-16,0.0,0.0,3.330669e-16,-2.220446e-16,1.94289e-16,-1.94289e-16,3.457783e-18,Empirical Data,,
3,Telford 1931 Auditory RT (4000ms SOA),2.0,1.0,4000.0,,0.0,0.0,0.0,Pure,,,Identical,Arbitrary,Arbitrary,None/Implicit,None/Implicit,1,1000.0,1.0,1.0,Auditory RT,Auditory RT,Both tasks were of the same task (i.e. two aud...,"{""param_overrides"": {""t1_stim_duration"":300, ""...",0.0,0.0,Dual-Task_PRP,0.0,1.0,0.0,0.0,SS_NA,SR_NA,RSO_Identical,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure,0.296165,-1.353693,-1.631674,-3.454232,3.233544,1.355132,1.258818,0.165576,0.210511,1.003187,0.279847,0.173418,0.675917,0.60927,0.341897,-0.086574,0.619186,0.255993,-0.130848,-0.35739,0.352744,-0.276427,0.068301,-0.176593,-0.021189,-0.021409,0.141225,0.021161,-0.099986,-9.714451e-16,-5.5511150000000004e-17,-5.551115e-16,-3.330669e-16,-2.220446e-16,-2.220446e-16,2.220446e-16,-3.330669e-16,2.498002e-16,-1.665335e-16,-6.825436e-18,Empirical Data,,
4,Telford 1931 Line Length (1000ms SOA),2.0,1.0,1000.0,,0.0,0.0,0.0,Pure,,,Identical,Arbitrary,Arbitrary,None/Implicit,None/Implicit,1,1000.0,3.0,3.0,Line Length Comparison,Line Length Comparison,Both tasks were of the same task (i.e. two len...,,0.5,0.5,Dual-Task_PRP,0.0,1.0,0.0,0.0,SS_NA,SR_NA,RSO_Identical,SRM_Arbitrary,TCT_Implicit,SRM2_Arbitrary,TCT2_Implicit,TTT_Pure,-2.191585,-1.287051,1.517026,-0.913239,0.550787,0.12747,0.199759,1.065361,0.614898,-0.037466,-0.287686,-0.040984,-0.022006,0.14491,-0.425805,0.056478,-0.081907,-0.079577,-0.782689,-0.088472,0.10213,-0.21033,0.025155,-0.112688,-0.040802,0.042676,0.027239,-0.025335,-0.043053,-1.082467e-15,-1.665335e-16,2.220446e-16,1.110223e-16,-2.220446e-16,-1.110223e-16,3.330669e-16,1.110223e-16,1.387779e-16,-6.106227e-16,5.365763000000001e-17,Empirical Data,,


In [12]:
np.mean(au.get_loadings_sparseness(loadings[["PC1", "PC2", "PC3"]].to_numpy()))

np.float64(0.3668129217306521)