In [1]:
import numpy as np
import pandas as pd

In [2]:
def values_creator_generator(dct):
    """
    This function receives a dict of possible keys and values to map them to, and returns a function that get a key, and returns the 
    appropriate value if the key is valid, and None otherwise.
    This function is helpful in transforming columns from one format to another, for example:
    did_win_map = {"reward": 1, "punishment": 0}
    df_dst["DidWin"] = df_src["award type"].apply(values_creator_generator(did_win_map))

    Note: Maybe this could be replace by just using
    .apply(dct.get_attr) with specifying that if the key isn't valid to return None
    """
    def values_creator(x):
        if x in dct.keys():
            return dct[x]
        return None
    return values_creator

In [13]:
did_win_map = {"reward": 1, "punishment": 0}
vas_question_number_map = {"Not anxious,Very anxious": 1, "Not at all,Very much": 2, 
                              "Not at all tired,Very tired": 3, "Worst mood ever,Best mood ever": 4}
vas_question_types_map = {1: "Anxiety", 2: "Avoidance", 3: "Tired", 4: "Mood"}

def generate_missing_values(df):
    """
    This function recieve a df from the "PARTICIPANT_door" format, and fills the missing values:
    subtrial, and TaskRunX
    Returns the final df
    """
    task_run_index = 0
    subtrial_index = 1
    previous_section = None

    df["Subtrial"] = np.nan

    for i, row in df.iterrows():
        current_section = df.at[i, "section"] 
        if current_section != previous_section:
            subtrial_index = 1
            if current_section == "Door Game (playing main game)":
                task_run_index += 1
        # Where is simulation in the PARTICIPANTS format?
        if current_section in ["Door Game (playing main game)", "Practice"]:
            df.at[i, "Subtrial"] = subtrial_index
            subtrial_index += 1
        if current_section == "Door Game (playing main game)":
            df.at[i, "section"] = "TaskRun" + str(task_run_index)
            
        previous_section = current_section
        
    return df
        

def reformat_file(src_path, dst_path):
    """
    This function recieve a path to a csv file in the "PARTICIPANT_door" format, and transforms it to the "Doors" format,
    and writes the data to dst_path as a csv file
    """
    df_src = pd.read_csv(src_path)
    df_dst = pd.DataFrame()

    df_src = generate_missing_values(df_src)

    df_dst["Subject"] = df_src["sdan"]
    df_dst["Subtrial"] = df_src["Subtrial"]
    # Maybe subject to more processing
    df_dst["Section"] = df_src["section"]

    df_dst["Reward_magnitude"] = df_src["door(r)"]
    df_dst["Punishment_magnitude"] = df_src["door(p)"]

    df_dst["DistanceFromDoor_SubTrial"] = df_src["door_locked_level"]
    df_dst["Distance_min"] = df_src["distance_min"]
    df_dst["Distance_max"] = df_src["distance_max"]

    # Ask about factor 1000
    df_dst["DoorAction_RT"] = df_src["door duration (sec)"] * 1000
    df_dst["Door_opened"] = df_src["award type"].apply(lambda x: 1 if x != "door not opened" else 0)
    df_dst["Door_Status"] = df_dst["Door_opened"].apply(lambda x: "opened" if x else "closed")
    df_dst["Door_outcome"] = df_src["award type"].apply(lambda x: x if x != "door not opened" else None)
    
    df_dst["DidWin"] = df_src["award type"].apply(values_creator_generator(did_win_map))

    df_dst["Total_coins"] = df_src["total_coins"]

    df_dst["VASQuestionNumber"] = df_src["vas_label"].apply(values_creator_generator(vas_question_number_map))
    df_dst["VAS_type"] = df_dst["VASQuestionNumber"].apply(values_creator_generator(vas_question_types_map))
    # Types are different, see if this is a problen
    df_dst["VAS_score"] = df_src["vas_response"]
    # The VAS_RT values are one column up from what's expected, so .shift is used to align it properly 
    df_dst["VAS_RT"] = df_src['slider_2.rt'].shift(1)

    # Add manual fake data for Q_X:
    df_fake = pd.DataFrame()
    df_fake["Section"] = ["Question"] * 5
    df_fake["Q_type"] = ["Won", "Lost", "Monster", "Coins", "Performance"]
    df_fake["Q_score"] = [100] * 5
    df_fake["Q_RT"] = [100] * 5
    df_fake["Subject"] = [df_dst["Subject"][0]]*5
    df_dst = pd.concat([df_dst, df_fake], ignore_index=True)
    

    df_dst.to_csv(dst_path)



In [14]:
reformat_file(r"/PARTICIPANT_door_2023-10-11_20h25.23.421 (1).csv", r"REFORMAT.csv")

In [None]:
"""
sdan -> Subject
section -> Section
X -> Subtrial
door(r) -> Reward_magnitude
door(p) -> Punishment_magnitude
door_locked_level -> DistanceFromDoor_SubTrial
distance_min -> Distance_min
distance_max -> Distance_max
door duration (sec) * 1000 -> dooraction
award type!="door not opened" -> Door_opened
award type!="door not opened" -> DoorStatus
award type -> Door_outcome+DidWin
total_coins -> Total_coins
displayed -> VASQuestionNumber+VAS_type
X -> VAS_score
vas_response -> VAS_RT
"""