In [1]:
from pandas import read_csv, concat, DataFrame
from os import getcwd
# from numpy import mean, std, array
import matplotlib.pyplot as plt
from scipy import stats

NUM_PARTICIPANTS = 24

PLOTTING = False

In [2]:
##########################################
def read_times(part_id):
    file_path = getcwd() + "\\all_times\\part" + str(part_id) + "_all_times.csv"
    raw_df = read_csv(file_path)
    # only use "no autonomy" condition
    raw_df = raw_df[raw_df["auto_level"]=="low_auto"].reset_index(drop=True)
    
    return raw_df


##########################################
def reject_outliers(df):
    q_low = df["move_time"].quantile(0.01)
    q_hi  = df["move_time"].quantile(0.99)
    return df[(df["move_time"] < q_hi) & (df["move_time"] > q_low)]


##########################################
def summarize_linreg(slope, intercept, r, p, std_err):
    print("="*20)
    print("linear regression has finished!")
    print("(slope, intercept) = (%.3f, %.3f)" % (slope, intercept))
    print("(r, p, std_err) = (%.3f, %.3f, %.3f)" % (r, p, std_err))

In [3]:
############# REGRESSION, STROING RESULTS FOR EACH PARTICIPANT #############
part_ids = [i for i in range(1, NUM_PARTICIPANTS+1)]
slopes = []
intercepts = []
rs = []
ps = []
std_errs = []


### loop through all participants
for part_id in range(1, NUM_PARTICIPANTS+1):

    raw_df = read_times(part_id)
    filtered_df_list = []
    
    # remove outliers (low difficulty)
    low_id_df = raw_df[raw_df["ring_id"]==1]
    filtered_df_list.append(reject_outliers(low_id_df))
    
    # remove outliers (med difficulty, both of them)
    med_id_df = raw_df[(raw_df["ring_id"]==2) | (raw_df["ring_id"]==3)]
    filtered_df_list.append(reject_outliers(med_id_df))
    
    # remove outliers (high difficulty)
    high_id_df = raw_df[raw_df["ring_id"]==4]
    filtered_df_list.append(reject_outliers(high_id_df))
    
    # join dataframe
    clean_df = concat(filtered_df_list, axis=0)

    # get data arrays
    id_list = clean_df['fitts_id_num']
    mt_list = clean_df['move_time']

    # linear regression and save parameters
    slope, intercept, r, p, std_err = stats.linregress(id_list, mt_list)
    # summarize_linreg(slope, intercept, r, p, std_err)
    slopes.append(slope)
    intercepts.append(intercept)
    rs.append(r)
    ps.append(p)
    std_errs.append(std_err)

    if PLOTTING:
        # plotting
        def myfunc(x):
            return slope * x + intercept

        mymodel = list(map(myfunc, id_list))

        plt.scatter(id_list, mt_list)
        plt.plot(id_list, mymodel)
        plt.title("Participant %d with no autonomy" % part_id)
        plt.xlabel("Fitts ID")
        plt.ylabel("Move Time (s)")
        plt.show()
        

######################### GENERATE NEW DATAFRAME #########################
# generate new dataframe
df_dict = {
    'pid': part_ids,
    'slope': slopes,
    'intercept': intercepts,
    'r': rs,
    'p': ps,
    'std_err': std_errs
}
processed_df = DataFrame(df_dict)

# write processed dataframe to csv file
dest_path = getcwd() + "\\linreg_params.csv"
processed_df.to_csv(dest_path, index=False)
print("\n Successfully written linear regression results to csv file! \n")


 Successfully written linear regression results to csv file! 

