## Reanalysis of Previous Silcton Data

In [32]:
# First, we apply a correction on the existing data. 

#baseDir = '../ReanalysisData'

# Training_Data.csv is the data used to train the model.
# These data were coded to have both buggy data AND corrected data, and are used to train the regression model. 
df_train_path = os.path.join(baseDir,'Training_Data.csv')

df_train = pd.read_csv(df_train_path, encoding = 'unicode_escape', sep = ",")

# We train only on new data
df_train = df_train[df_train.New_or_Original=='New']

# Separate models for within and between 
X_train_within = df_train[['bad_pointing_coding_within']]
y_train_within = df_train[['good_pointing_coding_within']]


within_model = linear_model.LinearRegression()
within_model.fit(X_train_within,y_train_within)

X_train_between = df_train[['bad_pointing_coding_between']]
y_train_between = df_train[['good_pointing_coding_between']]

between_model = linear_model.LinearRegression()
between_model.fit(X_train_between,y_train_between)

# summaries of the regression models
print(f"Linear regression for within pointing:\n" + 
      f"intercept = {within_model.intercept_[0]:.2f}\n"
      f"beta = {within_model.coef_[0,0]:.2f}\n" + 
      f"R^2 = {within_model.score(X_train_within,y_train_within):.2f}\n")

print(f"Linear regression for between pointing:\n" + 
      f"intercept = {between_model.intercept_[0]:.2f}\n"
      f"beta = {between_model.coef_[0,0]:.2f}\n" + 
      f"R^2 = {between_model.score(X_train_between,y_train_between):.2f}\n")

Linear regression for within pointing:
intercept = -1.18
beta = 1.45
R^2 = 0.93

Linear regression for between pointing:
intercept = -0.09
beta = 1.28
R^2 = 0.93



In [None]:
def applyCorrection(df,within='Within_Pointing',between='Between_Pointing',within_model=within_model,between_model=between_model):

    # Now calculate participant and within/between averages. 
    within_buggy = df[[within]]
    between_buggy = df[[between]]
    
    # Filter out the nans
    within_buggy_filter = within_buggy.isna()
    between_buggy_filter = between_buggy.isna()
    
    # Create a copy of each of these
    within_corrected = within_buggy.copy()
    between_corrected = between_buggy.copy()
    
    # we store in an array all predicted (corrected) angles
    within_corrected[~within_buggy_filter[within]] = within_model.predict(within_buggy[~within_buggy_filter[within]])
    between_corrected.loc[~between_buggy_filter[between]] = between_model.predict(between_buggy[~between_buggy_filter[between]])

    
    return within_corrected,between_corrected


def plotCorrection(within_buggy,between_buggy,within_corrected,between_corrected,savefig=savefig):#,output=output):
    
    # Set up legend elements for the canonical plots
    canonical_legend_old = [plt.Line2D([0], [0], marker='o',color='w',markerfacecolor='red', label='Old Data: Buggy'),
                   plt.Line2D([0], [0], marker='o',color='w',markerfacecolor='blue', label='Old Data: Corrected')]

    fig, ax = plt.subplots(figsize=(18,9))
    
    ax.set_xlabel("Within Pointing Error (Degrees)")
    ax.set_ylabel("Between Pointing Error (Degrees)")
    
    ax.scatter(between_corrected,within_corrected, 
               c='blue',alpha=.5,s=120,label='Old Data: Corrected')
    ax.scatter(between_buggy,within_buggy,
               c='red',alpha=.5,s=120,label='Old Data: Buggy')
    
    ax.set_xlim([0,100])
    ax.set_ylim([0,90])

    ax.legend(handles=canonical_legend_old, loc='upper left')

    if savefig:
        plt.savefig(output)

    plt.show()