In [24]:
from pandas import read_csv, DataFrame

NUM_PARTICIPANTS = 24

POSE_ORDERS = ["ABDC", "BCAD", "CDBA", "DACB"]

In [35]:
##########################################################################################
def get_raw_data():
    
    my_file_dir = 'c:\\Users\\micha\\OneDrive\\My_GitHub_Repos\\robot-x-ar\\study2\\data\\questionnaire\\main_form.csv'
    big_df = read_csv(my_file_dir)
    num_cols = big_df.shape[1]
    df = big_df.iloc[2:2+NUM_PARTICIPANTS*2, 17:-1]
    # df = df.fillna(1)
    
    print("\n Finished reading raw csv file! \n")
    
    return df


##########################################################################################
def preprocess_data():
    
    # get overall dataframe
    raw = get_raw_data()
    
    # get first 2 columns as lists
    part_id_list = raw.iloc[:,0].tolist()
    for i in range(len(part_id_list)):
        # part_id_list[i] = int(sub("P", "", part_id_list[i]))
        part_id_list[i] = int(part_id_list[i])
    
    # get the conditions column and the single-scale difficulty column
    cond_list = raw.iloc[:, 1].apply(lambda x: "yes" if x[0]=="Y" else "no").tolist()
    difficulty_list = raw.iloc[:, 2].apply(lambda x: float(8.0 - float(x))).tolist()
    
    # get order of {yes, no}
    cond_order_list = []
    for i in range(0, len(cond_list)-1, 2):
        if cond_list[i]=="yes":
            cond_order_list.append("viz_first")
            cond_order_list.append("viz_first")
        else:
            cond_order_list.append("none_first")
            cond_order_list.append("none_first")
    
    # get order of the poses
    pose_order_list = raw.iloc[:, 1].apply(lambda x: "po"+str(POSE_ORDERS.index(x[1:]) + 1)).tolist()
    
    
    # get lists for each tlx dimension
    tlx1_list = raw.iloc[:, 3].apply(lambda x: float(x)).tolist()
    tlx2_list = raw.iloc[:, 4].apply(lambda x: float(x)).tolist()
    tlx3_list = raw.iloc[:, 5].apply(lambda x: float(x)).tolist()
    tlx4_list = raw.iloc[:, 6].apply(lambda x: float(21.0 - float(x))).tolist()
    tlx5_list = raw.iloc[:, 7].apply(lambda x: float(x)).tolist()
    tlx6_list = raw.iloc[:, 8].apply(lambda x: float(x)).tolist()
    
    # get average values of TLX
    tlx_ave_list = []
    for i in range(len(tlx1_list)):
        tlx_row_list = [tlx1_list[i], tlx2_list[i], tlx3_list[i], tlx4_list[i], tlx5_list[i], tlx6_list[i]]
        tlx_ave = sum(tlx_row_list) / len(tlx_row_list)
        tlx_ave_list.append(tlx_ave)
        
    
    ###### write to new (cleaned) dataframe ######
    
    # generate new dataframe
    df_dict = {
        'pid': part_id_list,
        'condition': cond_list,
        'cond_order': cond_order_list,
        'pose_order': pose_order_list,
        'difficulty': difficulty_list,
        'tlx_mental': tlx1_list,
        'tlx_physical': tlx2_list,
        'tlx_hurried': tlx3_list,
        'tlx_successful': tlx4_list,
        'tlx_hard': tlx5_list,
        'tlx_insecure': tlx6_list,
        'tlx_ave': tlx_ave_list
    }
    cleaned_df = DataFrame(df_dict).sort_values(by="pid")
    
    # write new dataframe to csv file
    # dest_path = getcwd() + "\data\questionnaire" + '\main_form_cleaned.csv'
    dest_path = 'c:\\Users\\micha\\OneDrive\\My_GitHub_Repos\\robot-x-ar\\study2\\data\\questionnaire\\main_form_cleaned.csv'
    cleaned_df.to_csv(dest_path, index=False)
    
    print(" Successfully written pre-processed data to csv file! \n")

In [36]:
preprocess_data()


 Finished reading raw csv file! 

 Successfully written pre-processed data to csv file! 

