# Importing Important Libraries

In [3]:
import pandas as pd
import numpy as np
import os

In [10]:
def analyze_target_file(target_csv):
    """
    Analyze target.csv to get counts of samples
    """
    # Read target file
    targets = pd.read_csv(target_csv)
    
    # Calculate total samples
    total_samples = len(targets)
    
    # Calculate samples per label
    label_counts = targets['labels'].value_counts()
    
    print(f"Total number of samples: {total_samples}")
    print("\nSamples per label:")
    for label, count in label_counts.items():
        print(f"{label}: {count}")
        
    # Print counts for binary classifications
    print("\nBinary classification counts:")
    
    # Face vs House
    face_house = targets[targets['labels'].isin(['face', 'house'])]
    face_count = len(face_house[face_house['labels'] == 'face'])
    house_count = len(face_house[face_house['labels'] == 'house'])
    print(f"\nFace vs House:")
    print(f"Face: {face_count}")
    print(f"House: {house_count}")
    print(f"Total: {face_count + house_count}")
    
    # Face vs Scissors
    face_scissors = targets[targets['labels'].isin(['face', 'scissors'])]
    face_count = len(face_scissors[face_scissors['labels'] == 'face'])
    scissors_count = len(face_scissors[face_scissors['labels'] == 'scissors'])
    print(f"\nFace vs Scissors:")
    print(f"Face: {face_count}")
    print(f"Scissors: {scissors_count}")
    print(f"Total: {face_count + scissors_count}")

if __name__ == "__main__":
    TARGET_CSV = "targets.csv"
    analyze_target_file(TARGET_CSV)

Total number of samples: 1452

Samples per label:
rest: 588
scissors: 108
face: 108
cat: 108
shoe: 108
house: 108
scrambledpix: 108
bottle: 108
chair: 108

Binary classification counts:

Face vs House:
Face: 108
House: 108
Total: 216

Face vs Scissors:
Face: 108
Scissors: 108
Total: 216


## Preprocessing the data for binary classification


In [15]:
def create_binary_dataset(base_folder, region, target_csv, label1, label2):
    """
    Create binary classification dataset for one region
    """
    print(f"\nProcessing {label1} vs {label2} for region {region}")
    
    # Read feature file (space-separated, no header)

    print(region)

    if region == "vt":

        feature_file = os.path.join(base_folder, region, "features.csv")
        features = pd.read_csv(feature_file, header=None, sep=' ')
    else:
        region_full = os.path.join(region, "RL")
        feature_file = os.path.join(base_folder, region_full, "features.csv")
        print(feature_file)
        features = pd.read_csv(feature_file, header=None, sep=' ')

    
    # Read target labels
    targets = pd.read_csv(target_csv)
    
    # Filter for required labels
    mask = targets['labels'].isin([label1, label2])
    filtered_targets = targets[mask]
    
    # Get features for selected labels using row indices
    selected_features = features.iloc[filtered_targets.index - 1]
    
    # Create binary labels (0 for label1, 1 for label2)
    binary_labels = (filtered_targets['labels'] == label2).astype(int)
    
    # Create final dataframe
    final_df = selected_features.copy()
    final_df['label'] = binary_labels.values
    final_df['label_name'] = filtered_targets['labels'].values
    
    # Save to CSV without index and without numbering the feature columns

    output_file = f"{region}_{label1}_vs_{label2}.csv"
    final_df.to_csv(output_file, index=False, header=False)
    print(f"Created {output_file}")
    print(f"Number of {label1}: {sum(binary_labels == 0)}")
    print(f"Number of {label2}: {sum(binary_labels == 1)}")

def main():
    # Settings
    BASE_FOLDER = "subj1"  # Change this to your base folder path
    REGIONS = ["vt", "Inferior_frontal"]
    TARGET_CSV = "targets.csv"
    
    # Process each region
    for region in REGIONS:
        # Create face vs house dataset
        create_binary_dataset(BASE_FOLDER, region, TARGET_CSV, "face", "house")
        
        # Create face vs scissors dataset
        create_binary_dataset(BASE_FOLDER, region, TARGET_CSV, "face", "scissors")

if __name__ == "__main__":
    main()


Processing face vs house for region vt
vt
Created vt_face_vs_house.csv
Number of face: 108
Number of house: 108

Processing face vs scissors for region vt
vt


Created vt_face_vs_scissors.csv
Number of face: 108
Number of scissors: 108

Processing face vs house for region Inferior_frontal
Inferior_frontal
subj1/Inferior_frontal/RL/features.csv
Created Inferior_frontal_face_vs_house.csv
Number of face: 108
Number of house: 108

Processing face vs scissors for region Inferior_frontal
Inferior_frontal
subj1/Inferior_frontal/RL/features.csv
Created Inferior_frontal_face_vs_scissors.csv
Number of face: 108
Number of scissors: 108
