In [1]:
import os
import pandas as pd

# Root directory
root_dir = r"C:\Users\psiok\Desktop\Justice Project\CN_DATA"

# List all directories inside the root directory
subject_dirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]

# Define the columns we're interested in
selected_columns = [
    "label_shame", "shame_condition",
    "label_wrongness", "wrongness_condition",
    "label_devaluation", "devaluation_condition",
    "label_time", "time_condition",
    "finalVar"
]

# Define a function to process the raw data from a CSV file
def process_raw_data(file_path):
    data = pd.read_csv(file_path)
    
    # Check if the required columns exist in the data
    if not set(selected_columns).issubset(data.columns):
        print(f"Warning: Missing columns in {file_path}. Skipping this file.")
        return None
    
    extracted_data = data[selected_columns]
    adjusted_data = extracted_data.iloc[6:].reset_index(drop=True)

    # Extract rows based on labels
    datasets = []
    labels = ["shame", "wrongness", "devaluation", "time"]
    for label in labels:
        if f'label_{label}' in adjusted_data.columns:
            rows = adjusted_data[adjusted_data[f'label_{label}'].notna()]
            datasets.append(rows[[f'label_{label}', f'{label}_condition', 'finalVar']])
    
    # Merge the datasets horizontally
    merged_data = pd.concat([df.reset_index(drop=True) for df in datasets], axis=1)
    return merged_data

# Define columns for each condition
condition_columns = {
    "shame": ["label_shame", "shame_condition", "finalVar"],
    "wrongness": ["label_wrongness", "wrongness_condition", "finalVar.1"],
    "devaluation": ["label_devaluation", "devaluation_condition", "finalVar.2"],
    "time": ["label_time", "time_condition", "finalVar.3"]
}

# Function to extract and sort data based on a given condition
def extract_and_sort(data, columns):
    # Extract the columns
    extracted_data = data[columns]
    
    # Drop rows where the label is NaN
    extracted_data = extracted_data[extracted_data[columns[0]].notna()]
    
    # Sort by the label
    sorted_data = extracted_data.sort_values(by=[columns[0]])
    
    return sorted_data

# Function to merge the finalVar columns while preserving NaN values
def merge_finalVars(row):
    for val in row:
        if not pd.isna(val):
            return val
    return None  # If all values are NaN, return None

# Process raw data, merge, and save the processed data
for subject_dir in subject_dirs:
    subject_path = os.path.join(root_dir, subject_dir)
    
    # Process each raw CSV file
    processed_data_list = []
    raw_csv_files = [f for f in os.listdir(subject_path) if f.endswith('.csv') and not f.startswith('processed_')]
    for raw_csv_file in raw_csv_files:
        raw_csv_path = os.path.join(subject_path, raw_csv_file)
        processed_data = process_raw_data(raw_csv_path)
        if processed_data is not None:
            processed_data_list.append(processed_data)
    
    # Merge the processed data
    if processed_data_list:
        merged_data = pd.concat(processed_data_list, axis=0, ignore_index=True)
        merged_file_name = f"merged_{subject_dir}.csv"
        merged_file_path = os.path.join(subject_path, merged_file_name)
        merged_data.to_csv(merged_file_path, index=False)

print("Processing and merging complete!")

# Adjust the merged data and save the adjusted data
for subject_dir in subject_dirs:
    subject_path = os.path.join(root_dir, subject_dir)
    merged_file_name = f"merged_{subject_dir}.csv"
    merged_file_path = os.path.join(subject_path, merged_file_name)
    
    if os.path.exists(merged_file_path):
        data = pd.read_csv(merged_file_path)
        adjusted_data_list = [extract_and_sort(data, columns) for _, columns in condition_columns.items()]
        adjusted_data = pd.concat(adjusted_data_list, axis=0).reset_index(drop=True)
        adjusted_file_name = f"adjusted_{subject_dir}.csv"
        adjusted_file_path = os.path.join(subject_path, adjusted_file_name)
        adjusted_data.to_csv(adjusted_file_path, index=False)

print("Adjustment complete!")

# Load the adjusted data, concatenate columns, and save the final data
for subject_dir in subject_dirs:
    subject_path = os.path.join(root_dir, subject_dir)
    adjusted_file_name = f"adjusted_{subject_dir}.csv"
    adjusted_file_path = os.path.join(subject_path, adjusted_file_name)
    
    if os.path.exists(adjusted_file_path):
        data = pd.read_csv(adjusted_file_path)
        labels = data[['label_shame', 'label_wrongness', 'label_devaluation', 'label_time']].fillna('').sum(axis=1)
        conditions = data[['shame_condition', 'wrongness_condition', 'devaluation_condition', 'time_condition']].fillna('').sum(axis=1)
        finalVars = data[['finalVar', 'finalVar.1', 'finalVar.2', 'finalVar.3']].apply(merge_finalVars, axis=1)
        
        final_data = pd.DataFrame({
            'label': labels,
            'condition': conditions,
            'finalVar': finalVars
        })
        
        final_data = final_data.sort_values(by='label')
        
        final_file_name = f"final_{subject_dir}.csv"
        final_file_path = os.path.join(subject_path, final_file_name)
        final_data.to_csv(final_file_path, index=False)

print("Final data processing complete!")


Processing and merging complete!
Adjustment complete!
Final data processing complete!
