# Pointing Task

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import re
from scipy.spatial import distance
import dexplot as dxp
import glob
import os

ModuleNotFoundError: No module named 'dexplot'

## Loading data 

In [None]:
# Added cell to set Working Directory to your location
os.chdir('/Users/tracysanchezpacheco/Documents/')
os.getcwd()

In [None]:
# specify data folder
# with wildcard for every file with a certain name
path = "Final/*.json"

In [None]:
## Extracting all subject IDs from the data folder
subIDs = []
for sub in glob.glob(path):
    if sub[15].isdigit():
        subIDs.append(int(sub[15:19]))
    else:
        pass
subIDs = np.unique(subIDs)
print(subIDs)

In [None]:
# create empty dataframe
df = pd.DataFrame()

# read every file name in folder
for filename in glob.glob(path):
    with open(filename, 'r') as file:
        # make json files parsable
        data = "[" + file.read()
        data = data[:len(data)-2] + "]"
        
        # read data per file
        subjectdf = pd.read_json(data)

        # insert participant id in every line
        subjectdf.insert(0, "SubjectID", [int(filename[6:10])] * subjectdf.shape[0], True)
        df = df.append(subjectdf, ignore_index=True)
df

## Data Wrangling 

### Avatar Categoricals 

In this section the variable `'ImageName'` will be desagrated into categorical variables tha provide information about the presence and nature of avatars in the trial. 

  
  - ` extraRow ` funtion that creates a new variable that contains matching data from preexiting variable
  - `'AvatarPresence'` variable that shows if the trial image contained an Avatar
  - `'AvatarCategory'` variable that shows if the trial asked to point to a location that originaly had an Action or Standing Avatar 
  - ` 'avatarIDsf' ` variable that shows the ID of the Avatar placed on the location to which the participant has to point
   

In [None]:
def extraRow(df,yourlist,variable,newVariable):
    '''Creates a new variable that contains matching data from preexiting variable
    
Parameters
-------------
     
     df: your data frame,
     yourlist: List of values you want to target for the partial match search
     variable: Variable that may cointain the elements on yourlist
     newvariable: Name for  New Variable 
     
returns 
----------

    DataFrame
     '''
    for idx, row in df.iterrows():
        for l in yourlist:
            if l in row[variable]:
                df.loc[idx, newVariable] = l
                

In [None]:
#Extracting the Avatar number from the variable 'ImageName'
AvaIDs = []
for idx, row in df.iterrows():
    num = re.findall(r'\d+', row['ImageName']) 
    AvaIDs.append(num)
    #Flatten the list: Every number was returned as a list of it's own with this we get one unified list
    avatarIDsf = [item for sublist in AvaIDs for item in sublist]


In [None]:
#Searching for a partial string contained in the lists "AvatarIDsf" in variable "ImageName" to creat a new variable "AvatarID"
extraRow(df,avatarIDsf,'ImageName','avatarID')
df.head()

In [None]:
df['AvatarPresence'] = df['ImageName'].str.contains('No')
df.loc[:, 'AvatarPresenceCategory'] = df['AvatarPresence'].replace({True: 'NoAvatar', False: 'Avatar'})
df.head()

In [None]:
#Checking that value counts for Avatar presence [Should be 50/50]
df.AvatarPresenceCategory.value_counts()

In [None]:
df['meaningful'] = df['ImageName'].str.contains('CmA')
df.loc[:, 'meaningfulBuilding'] = df['meaningful'].replace({True: 'Action', False: 'Standing'})
df.head()

In [None]:
#Checking that value counts for Action and Standing avatars [Should be 50/50]
df.meaningfulBuilding.value_counts()

### Unesting variables 

In [None]:
# exclude all rows where the participant didn't submit in time
dfT = df[df.TimeOut == False].reset_index(drop=True)
dfT.info()

In [None]:
def unnest(dframe):
    
    '''Unnest nested data that is needed from a JSON file by inserting each koordinate 
    as a seperate column of the DataFrame. 
    This Function is specific for the variable 

    Parameters
    -------------
     
     df: your data frame 
    returns 
    ----------
    DataFrame
        
    '''
    TargetBuildingDirection = pd.DataFrame.from_records(dframe['TargetBuildingForward'])
    dframe.insert(25, "TargetBuildingDirection_z",TargetBuildingDirection['z'], True)
    dframe.insert(25, "TargetBuildingDirection_y",TargetBuildingDirection['y'], True)
    dframe.insert(25, "TargetBuildingDirection_x",TargetBuildingDirection['x'], True)

    TargetBuildingRotation = pd.DataFrame.from_records(dframe['TargetBuildingRotation'])
    dframe.insert(24, "TargetBuildingRotation_z",TargetBuildingRotation['z'], True)
    dframe.insert(24, "TargetBuildingRotation_y",TargetBuildingRotation['y'], True)
    dframe.insert(24, "TargetBuildingRotation_x",TargetBuildingRotation['x'], True)

    TargetBuildingPosition = pd.DataFrame.from_records(dframe['TargetBuildingPosition'])
    dframe.insert(23, "TargetBuildingPosition_z",TargetBuildingPosition['z'], True)
    dframe.insert(23, "TargetBuildingPosition_y",TargetBuildingPosition['y'], True)
    dframe.insert(23, "TargetBuildingPosition_x",TargetBuildingPosition['x'], True)

    PointerDirection = pd.DataFrame.from_records(dframe['PointerDirection'])
    dframe.insert(21, "PointerDirection_z",PointerDirection['z'], True)
    dframe.insert(21, "PointerDirection_y",PointerDirection['y'], True)
    dframe.insert(21, "PointerDirection_x",PointerDirection['x'], True)

    PointerRotation = pd.DataFrame.from_records(dframe['PointerRotation'])
    dframe.insert(19, "PointerRotation_z", PointerRotation['z'], True)
    dframe.insert(19, "PointerRotation_y", PointerRotation['y'], True)
    dframe.insert(19, "PointerRotation_x", PointerRotation['x'], True)

    PointerPosition = pd.DataFrame.from_records(dframe['PointerPosition'])
    dframe.insert(18, "PointerPosition_z", PointerPosition['z'], True)
    dframe.insert(18, "PointerPosition_y", PointerPosition['y'], True)
    dframe.insert(18, "PointerPosition_x", PointerPosition['x'], True)

    ParticipantRotation = pd.DataFrame.from_records(dframe['ParticipantRotation'])
    dframe.insert(17, "ParticipantRotation_z",ParticipantRotation['z'], True)
    dframe.insert(17, "ParticipantRotation_y", ParticipantRotation['y'], True)
    dframe.insert(17, "ParticipantRotation_x", ParticipantRotation['x'], True)

    ParticipantPosition = pd.DataFrame.from_records(dframe['ParticipantPosition'])
    dframe.insert(16, "ParticipantPosition_z", ParticipantPosition['z'], True)
    dframe.insert(16, "ParticipantPosition_y", ParticipantPosition['y'], True)
    dframe.insert(1, "ParticipantPosition_x", ParticipantPosition['x'], True)
        
    return dframe

In [None]:
dfT = unnest(dfT)
dfT.head()

In [None]:
dfT.info()

In [None]:
dfT.to_csv('PointingTask.csv')

## Data exploration

### Avatar Factors

In [None]:
dfT.AvatarPresenceCategory.value_counts()

In [None]:
dxp.count('SubjectID', data=df, split='AvatarPresenceCategory')

In [None]:
dfT[[  "SubjectID", "AvatarPresenceCategory"]].groupby([ "SubjectID", "AvatarPresenceCategory"]).size()

In [None]:
dfT.meaningfulBuilding.value_counts()

In [None]:
dxp.count('SubjectID', data=df, split='meaningfulBuilding')

In [None]:
dfT[[ "SubjectID", "meaningfulBuilding"]].groupby([ "SubjectID", "meaningfulBuilding"]).size()

### Building counts

In [None]:
plt.rcParams['figure.figsize'] = (20,14)
sns.countplot(data= dfT, x='ImageName')
plt.xticks(fontsize=9, rotation=90)
plt.xlabel('Task Stimuli', fontsize=18)
plt.ylabel( 'Frenquecy', fontsize=16)
plt.yticks(fontsize=10)
plt.show()

In [None]:
print(dfT[[ 'ImageName']].value_counts().to_markdown())

In [None]:
print(dfT[['SubjectID', 'ImageName']].value_counts().to_markdown())

### Angles

In [None]:
dfT.Angle.describe()

In [None]:
dfT.groupby( "SubjectID")['Angle'].describe()

In [None]:
#Histogram response times 
plt.title("Binned frequencies for Angular error", fontsize=18)
sns.histplot(data=dfT, x="Angle", binwidth=2)
plt.xticks(fontsize=10, rotation=90)
plt.xlabel('Angular error in Degrees', fontsize=18)
plt.ylabel( 'Frenquecy', fontsize=16)
plt.yticks(fontsize=10)
plt.show()

### Response Time 

In [None]:
dfT.RT.describe()

In [None]:
#Histogram response times 
plt.title("Binned frequencies for Response Time", fontsize=18)
sns.histplot(data=dfT, x="RT", binwidth=2)
plt.xticks(fontsize=10, rotation=90)
plt.xlabel('Response time in seconds', fontsize=18)
plt.ylabel( 'Frenquecy', fontsize=16)
plt.yticks(fontsize=10)
plt.show()

In [None]:
# Log-transform response times
dfT['log_time'] = np.log(dfT['RT']) 

In [None]:
plt.title("Binned frequencies for Log Response Time", fontsize=18)
sns.histplot(data=dfT, x="log_time", binwidth=0.100)
plt.xticks(fontsize=10, rotation=90)
plt.xlabel('Response time in seconds', fontsize=18)
plt.ylabel( 'Frenquecy', fontsize=16)
plt.yticks(fontsize=10)
plt.show()

### Dealing with  angles 

In [None]:
# Recalculate angles from vectors in order to include sign
# Functions for calculating signed angles


def Normalize(v):
    v1 = math.sqrt(sum([x ** 2 for x in v]))
    return([x/v1 for x in v])

# Calc the signed angle between two vectors


def signedAngle(v1, v2):
    v1Norm = Normalize(v1)
    v2Norm = Normalize(v2)

    angle = math.atan2(v2[1], v2[0]) - math.atan2(v1[1], v1[0])
    return(np.rad2deg(angle))


# Insert corrected signed angles into the dataframe
signedAngles = []

for count, x in enumerate(dfT['DistanceToParticipant']):
    signedAngles = signedAngles + [signedAngle([dfT['PointerDirection_x'][count], dfT['PointerDirection_z'][count]], [
                                               dfT['TargetBuildingDirection_x'][count], dfT['TargetBuildingDirection_z'][count]])]

# Account for angle deviations > 180°
for count, angle in enumerate(signedAngles):
    if angle > 180:
        signedAngles[count] = angle - 360
    elif angle < -180:
        signedAngles[count] = angle + 360

dfT.insert(11, "SignedAngle", signedAngles, True)



In [None]:
# Order participants from best avg performance (left) to worst avg performance (right) for the heatmap

def orderParticipants(inputdf=pd.DataFrame(), orderBy=""):
    inputdf = inputdf.sort_values(orderBy)
    for i, x in enumerate(inputdf.SubjectID.unique()):
        inputdf = inputdf.replace(x, i + 1)
    return inputdf



# Heatmap plots


def meanErrXTargetB(participantgroup):
    heatmapdf = pd.DataFrame()
    # Draw mean deviation per participant (x) and target building (y)
    for x in participantgroup.ImageName.unique():
        targets = participantgroup[participantgroup.ImageName == x]
        for y in targets.SubjectID.unique():
            heatmapdf = heatmapdf.append({'ImageName': str(x), 'SubjectID': str(y), 'MeanError': np.average(
                targets[targets.SubjectID == y]['Angle']), 'SubjectMean': np.average(participantgroup[participantgroup.SubjectID == y]['Angle'])}, ignore_index=True)

    return orderParticipants(heatmapdf, "SubjectMean")


In [None]:
# Draw graphs for control and test group
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,24))

plt.title("Absolute average divergence from Target (in °) per Participant and Target Building",  fontsize=18)
sns.heatmap(meanErrXTargetB(dfT).pivot("ImageName",
            "SubjectID", "MeanError"), cmap="Spectral", center=40)

plt.xticks(fontsize=10, rotation=90)
plt.xlabel('Subject', fontsize=18)
plt.ylabel( 'Target', fontsize=18)
plt.yticks(fontsize=10)
plt.show()


In [None]:
def stdXTargetB(participantgroup):
    heatmapdf = pd.DataFrame()
    # Draw standard deviation of deviation per participant (x) and target building (y)
    for x in participantgroup.ImageName.unique():
        targets = participantgroup[participantgroup.ImageName == x]
        for y in targets.SubjectID.unique():
            heatmapdf = heatmapdf.append({'ImageName': str(x), 'SubjectID': str(y), 'SdevError': np.std(
                targets[targets.SubjectID == y]['Angle']), 'SubjectStd': np.std(participantgroup[participantgroup.SubjectID == y]['Angle'])}, ignore_index=True)

    return orderParticipants(heatmapdf, "SubjectStd")

In [None]:
# Draw graph
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,24))
plt.title("Absolute Standard of divergences from Target (in °) per Participant and Target Building",  fontsize=18)
sns.heatmap(stdXTargetB(dfT).pivot("ImageName",
            "SubjectID", "SdevError"), cmap="Spectral", center=40)
plt.xticks(fontsize=10, rotation=90)
plt.xlabel('Subject', fontsize=18)
plt.ylabel( 'Target', fontsize=18)
plt.yticks(fontsize=10)
plt.show()

In [None]:
distances = pd.DataFrame()
middle = [np.average(dfT['TargetBuildingPosition_x']), np.average(
    dfT['TargetBuildingPosition_y']), np.average(dfT['TargetBuildingPosition_z'])]
for x in dfT.TargetBuildingPosition_x.unique():
    distances = distances.append({'AvgAccuracy': np.average(dfT[dfT.TargetBuildingPosition_x == x]['Angle']), 'Centricity': distance.euclidean(middle, [x, np.average(
        dfT[dfT.TargetBuildingPosition_x == x]['TargetBuildingPosition_y']), np.average(dfT[dfT.TargetBuildingPosition_x == x]['TargetBuildingPosition_z'])])}, ignore_index=True)
#plt.title("Target distance from center by divergence from Target", weight = 'bold')
plt.xlabel("Distance from center", weight='bold')
# plotting the datapoints
#plt.scatter(distances['Centricity'], distances['AvgAccuracy'], cmap="Reds")
sns.regplot(data=distances, x='Centricity', y='AvgAccuracy', color="tab:orange")
plt.xlabel("Target distance from center of the map", weight='bold', fontsize=18)
plt.ylabel("Mean Divergence per Target (in °)", weight='bold', fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.show()

In [None]:
abw = []
dist = []
for x in dfT.DistanceToParticipant.unique():
    dist = dist + [x]
    abw = abw + \
        [np.average(dfT[dfT.DistanceToParticipant == x]
                    ['Angle'])]

#plt.title("Distance to target by divergence (in °) from target")

# plotting the datapoints
#plt.scatter(dist, abw, 20, cmap="Blues")
sns.regplot( y=abw, x=dist, color="tab:blue", scatter_kws={'s': 20})


plt.xlabel("Distance to Target (Meters)", weight='bold', fontsize=18)
plt.ylabel("Divergence from Target (in °)", weight='bold', fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.show()


In [None]:
yvals, grp = [], []

for i, subject in enumerate(dfT.SubjectID.unique()):
    yvals.append(np.average(dfT[dfT.SubjectID == subject]['Angle']))
    grp.append("dfT")

# %%
sns.boxplot(x=grp, y=yvals, palette='Blues')
sns.swarmplot(x=grp, y=yvals, palette='Reds')
plt.axhline(np.average(yvals), color='black', linestyle='--')
plt.ylabel("Mean Absolute Deviation from target (in °)", fontsize=18)
plt.xlabel('')
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.show()


In [None]:

for subject in dfT.SubjectID.unique():
    plt.axes(projection='polar')
    plt.title("SubjectID: " + str(subject) + " Accuracy per building", fontsize=18)
    dev = dfT[dfT.SubjectID == subject]
    rads = []
    dur = []
    buildings = []
    for building in dev.avatarID.unique():
        rads = rads + \
            [math.radians(np.average(dev[dev.avatarID == building]['Angle']))]
        dur = dur + \
            [math.radians(np.average(
                dev[dev.avatarID == building]['RT']))]
        buildings = buildings + [int(building)]
     # plotting the circle
    plt.scatter(rads, dur, c=buildings)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)

# display the Polar plot
    plt.show()
# %%

In [None]:
rads = []
dur = []
buildings = []
for building in dfT.avatarID.unique():
        rads = rads +  [math.radians(np.average(dfT[dfT.avatarID == building]['Angle']))]
        dur = dur +  [math.radians(np.average(dfT[dfT.avatarID == building]['RT']))]
        buildings = buildings + [int(building)]

 # plotting the circle
plt.axes(projection='polar')
plt.scatter(rads, dur, c=buildings)
plt.title( "Average angle error to target across all participants", fontsize=18)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
# display the Polar plot
plt.show()
# %%

## Data Analysis 

In [None]:
from statsmodels.formula.api import ols, glm
import statsmodels.api as sm

In [None]:
# Define model formula
formula = 'log_time ~ AvatarPresence'

# Define probability distribution for the response variable for 
# the linear (LM) and logistic (GLM) model
family_LM = sm.families.Gaussian()
family_GLM = sm.families.Binomial()

# Define and fit a linear regression model
model_LM = glm(formula = formula, data = dfT, family = family_LM).fit()
print(model_LM.summary())

In [None]:
dfT.groupby( ["SubjectID", "AvatarPresenceCategory"])['RT'].describe()

In [None]:
# Define model formula
formula = 'Angle ~ AvatarPresence'

# Define and fit a linear regression model
model_LM = glm(formula = formula, data = dfT, family = family_LM).fit()
print(model_LM.summary())

In [None]:
dfT.groupby( ["SubjectID", "AvatarPresenceCategory"])['Angle'].describe().sort_values(by='mean')

In [None]:
pd.set_option('display.max_rows',1000)
dfT.groupby( "ImageName")['Angle'].describe().sort_values(by='mean')



In [None]:


# %%
# calculate distance TODO für alle Gebäude
distances = pd.DataFrame()
middle = [np.average(PTBcontrol['TargetBuildingPosition_x']), np.average(
    PTBcontrol['TargetBuildingPosition_y']), np.average(PTBcontrol['TargetBuildingPosition_z'])]
for x in PTBcontrol.TargetBuildingPosition_x.unique():
    distances = distances.append({'AvgAccuracy': np.average(PTBcontrol[PTBcontrol.TargetBuildingPosition_x == x]['RecalculatedAngle']), 'Centricity': distance.euclidean(middle, [x, np.average(
        PTBcontrol[PTBcontrol.TargetBuildingPosition_x == x]['TargetBuildingPosition_y']), np.average(PTBcontrol[PTBcontrol.TargetBuildingPosition_x == x]['TargetBuildingPosition_z'])])}, ignore_index=True)
#plt.title("Target distance from center by divergence from Target", weight = 'bold')
plt.xlabel("Distance from center", weight='bold')


# plotting the datapoints
sns.regplot(data=distances, x='Centricity', y='AvgAccuracy',
            label="Control group", color="tab:blue")
#plt.scatter(distances['Centricity'], distances['AvgAccuracy'])
distances = pd.DataFrame()
middle = [np.average(PTBbelt['TargetBuildingPosition_x']), np.average(
    PTBbelt['TargetBuildingPosition_y']), np.average(PTBbelt['TargetBuildingPosition_z'])]
for x in PTBbelt.TargetBuildingPosition_x.unique():
    distances = distances.append({'AvgAccuracy': np.average(PTBbelt[PTBbelt.TargetBuildingPosition_x == x]['RecalculatedAngle']), 'Centricity': distance.euclidean(middle, [x, np.average(
        PTBbelt[PTBbelt.TargetBuildingPosition_x == x]['TargetBuildingPosition_y']), np.average(PTBbelt[PTBbelt.TargetBuildingPosition_x == x]['TargetBuildingPosition_z'])])}, ignore_index=True)

# plotting the datapoints
#plt.scatter(distances['Centricity'], distances['AvgAccuracy'], cmap="Reds")
sns.regplot(data=distances, x='Centricity', y='AvgAccuracy',
            label="Belt group", color="tab:orange")
plt.xlabel("Target distance from center of the map", weight='bold')
plt.ylabel("Mean Divergence per Target (in °)", weight='bold')
plt.legend()
plt.show()
# %%
abw = []
dist = []
for x in PTBcontrol.DistanceToParticipant.unique():
    dist = dist + [x]
    abw = abw + \
        [np.average(PTBcontrol[PTBcontrol.DistanceToParticipant == x]
                    ['RecalculatedAngle'])]

#plt.title("Distance to target by divergence (in °) from target")

# plotting the datapoints
#plt.scatter(dist, abw, 20, cmap="Blues")
sns.regplot(dist, abw, label="Control group",
            color="tab:blue", scatter_kws={'s': 20})

abw = []
dist = []
for x in PTBbelt.DistanceToParticipant.unique():
    dist = dist + [x]
    abw = abw + \
        [np.average(PTBbelt[PTBbelt.DistanceToParticipant == x]
                    ['RecalculatedAngle'])]

# plotting the datapoints
#plt.scatter(dist, abw, 20, cmap="Reds")
sns.regplot(dist, abw, label="Belt group",
            color="tab:orange", scatter_kws={'s': 20})

plt.xlabel("Distance to Target (Meters)", weight='bold')
plt.ylabel("Divergence from Target (in °)", weight='bold')
plt.legend()
plt.show()


# %% Plots beginnen hier
# for subject in cgroup.SubjectID.unique():
#     plt.axes(projection='polar')
#     plt.title("SubjectID: " + str(subject) + " Genauigkeit je Gebäude")
#     dev = cgroup[cgroup.SubjectID == subject]
#     rads = []
#     dur = []
#     buildings = []
#     for building in dev.ImageName.unique():
#         rads = rads + \
#             [math.radians(np.average(dev[dev.ImageName == building]['RecalculatedAngle']))]
#         dur = dur + \
#             [math.radians(np.average(
#                 dev[dev.ImageName == building]['TrialDuration']))]
#         buildings = buildings + [int(building[13:])]

#     # plotting the circle
#     plt.scatter(rads, dur, c=buildings)

#     # display the Polar plot
#     plt.show()
# %%

# deviation from building per participant
ax = plt.axes(projection='polar')
ax.set_theta_zero_location("N")
# ax.set_thetamin(90)  # set the limits
# ax.set_thetamax(-90)
ax.set_xlabel("Divergence from Target", weight='bold')
ax.set_ylabel("Standard deviation of Divergence", labelpad=33, weight='bold')
ax.set_rlabel_position(-120.5)
#plt.title("Directional bias")
avg = []
subj = []
for subject in PTBcontrol.SubjectID.unique():
    avg = avg + \
        [math.radians(np.average(
            PTBcontrol[PTBcontrol.SubjectID == subject]['SignedAngle']))]
    subj = subj + \
        [np.std(PTBcontrol[PTBcontrol.SubjectID == subject]['SignedAngle'])]

# plotting the datapoints
sns.scatterplot(avg, subj, cmap='Blues', label="Control participants")

avg = []
subj = []
for subject in PTBbelt.SubjectID.unique():
    avg = avg + [math.radians(np.average(
        PTBbelt[PTBbelt.SubjectID == subject]['SignedAngle']))]
    subj = subj + \
        [np.std(PTBbelt[PTBbelt.SubjectID == subject]['SignedAngle'])]

# plotting the datapoints
sns.scatterplot(avg, subj, cmap='Reds', label="Belt participants")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)

plt.show()

# %%
# deviation from building per participant
ax = plt.axes(projection='polar')
ax.set_theta_zero_location("E")
ax.set_thetamin(0)  # set the limits
ax.set_thetamax(180)
ax.set_xticks(np.pi/180. * np.linspace(0, 180, 7))
ax.set_xlabel("Divergence from Target", labelpad=-45, weight='bold')
ax.set_ylabel("Standard deviation of Divergence", labelpad=33, weight='bold')
#plt.title("Mean divergence (in °) from target per participant")
avg = []
subj = []
for subject in PTBcontrol.SubjectID.unique():
    avg = avg + \
        [math.radians(np.average(
            PTBcontrol[PTBcontrol.SubjectID == subject]['RecalculatedAngle']))]
    subj = subj + \
        [np.std(PTBcontrol[PTBcontrol.SubjectID == subject]['RecalculatedAngle'])]

# plotting the datapoints
sns.scatterplot(avg, subj, cmap='Blues', label="Control participants")

avg = []
subj = []
for subject in PTBbelt.SubjectID.unique():
    avg = avg + [math.radians(np.average(
        PTBbelt[PTBbelt.SubjectID == subject]['RecalculatedAngle']))]
    subj = subj + \
        [np.std(PTBbelt[PTBbelt.SubjectID == subject]['RecalculatedAngle'])]

# plotting the datapoints
sns.scatterplot(avg, subj, cmap='Reds', label="Belt participants")
plt.legend(loc='upper right', borderaxespad=0)

plt.show()

# %%

yvals, grp = [], []

for i, subject in enumerate(PTBcontrol.SubjectID.unique()):
    yvals.append(np.average(
        PTBcontrol[PTBcontrol.SubjectID == subject]['RecalculatedAngle']))
    grp.append("Control Group")

for i, subject in enumerate(PTBbelt.SubjectID.unique()):
    yvals.append(np.average(
        PTBbelt[PTBbelt.SubjectID == subject]['RecalculatedAngle']))
    grp.append("Belt Group")

#plt.title("Average Divergence per Participants of the respective Groups")
sns.set(rc={"figure.figsize": (5, 5)}, style="ticks")
sns.boxplot(x=grp, y=yvals, palette=['tab:blue', 'tab:orange'])
sns.swarmplot(x=grp, y=yvals, palette='rocket_r')
plt.ylabel("Angle of divergence", weight='bold')
plt.xlabel("Pointing to Building Task", weight='bold', labelpad=10)

plt.axhline(np.average(yvals), color='black', linestyle='--')

plt.show()
# %%
# Histogram über alle trials
#plt.title("Binned frequencies for divergence from Target (Control group)")
f, axs = plt.subplots(1, 2,
                      figsize=(14, 7),
                      # sharey=True,
                      gridspec_kw={'width_ratios': [2, 2]})

sns.histplot(data=PTBcontrol, x="RecalculatedAngle", binwidth=5,
             stat="frequency", color="tab:blue", kde=True, ax=axs[0])
axs[0].set(ylim=(0, 60))
axs[0].set_xlabel("Angle (Bin = 5°)", weight='bold')
axs[0].axvline(x=PTBcontrol["RecalculatedAngle"].mean(),
               color='red', label="Mean")
axs[0].axvline(x=np.median(PTBcontrol["RecalculatedAngle"]),
               color='green', ls="--", label="Median")

#plt.title("Binned frequencies for divergence from Target (Test Group)")
sns.histplot(data=PTBbelt, x="RecalculatedAngle", binwidth=5,
             stat="frequency", color="tab:orange", kde=True, ax=axs[1])
axs[1].set(ylim=(0, 60))
axs[1].set_xlabel("Angle (Bin = 5°)", weight='bold')
axs[1].axvline(x=PTBbelt["RecalculatedAngle"].mean(),
               color='red', label="Mean")
axs[1].axvline(x=np.median(PTBbelt["RecalculatedAngle"]),
               color='green', ls="--", label="Median")
axs[0].legend()
axs[1].legend()
plt.show()


# %%
# sns.set resets the style or theme preferences to the seaborn standard if not specifically mentioned
# just mention N size instead of normalizing plots
f, axs = plt.subplots(1, 2,
                      figsize=(14, 7),
                      # sharey=True,
                      gridspec_kw={'width_ratios': [25, 13]})
#sns.set(rc={"figure.figsize": (12, 7)}, style="ticks")
sns.boxplot(data=PTBcontrol, x='SubjectID', y='RecalculatedAngle',
            color="tab:blue", ax=axs[0])
axs[0].set_xlabel("Control Participant", weight='bold')
axs[0].set_ylabel("Divergence from target (in °)", weight='bold')
axs[0].set_xticklabels(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
                       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26'])


#sns.set(rc={"figure.figsize": (6, 7)}, style="ticks")
sns.boxplot(data=PTBbelt, x='SubjectID', y='RecalculatedAngle',
            color="tab:orange", ax=axs[1])
axs[1].set_xlabel("Belt Participant", weight='bold')
axs[1].set_ylabel("Divergence from target (in °)", weight='bold')
axs[1].set_xticklabels(['1', '2', '3', '4', '5', '6', '7',
                       '8', '9', '10', '11', '12', '13', '14'])

plt.show()

# %%
# test for homogenity of variances
#stats.levene(cgroup.groupby("SubjectID")['Angle'].describe()['mean'], tgroup.groupby("SubjectID")['Angle'].describe()['mean'])
print("independend t-test for the averages of both groups")
#Studnt, n=25,  2-tail
#stats.t.ppf(1-0.025, cgroup)
# df=n-1=25-1=24
print("t-value:", stats.t.ppf(1-0.05, PTBcontrol['SubjectID'].nunique() - 1))
scipy.stats.ttest_ind(PTBcontroldescribe[
                      'mean'], PTBbeltdescribe['mean'])
# %%
print("independend t-test for the stds of both groups")
print("t-value:", stats.t.ppf(1-0.05, PTBbelt['SubjectID'].nunique() - 1))
scipy.stats.ttest_ind(PTBcontroldescribe[
                      'std'], PTBbeltdescribe['std'])

# %%
beltP = PTBbelt[PTBbelt.SubjectID == 1041]
# %%
#plt.title("Participant 1041: Absolute Angles")
sns.histplot(data=beltP, x="RecalculatedAngle", binwidth=5,
             color="tab:orange", binrange=(0, 180))
plt.xlabel("Angle (Bin = 5°)", weight='bold')
plt.show()
#plt.title("Participant 1041: Signed Angles")
sns.histplot(data=beltP, x="SignedAngle", binwidth=10,
             color="tab:orange", binrange=(-180, 180))
plt.xlabel("Angle (Bin = 10°)", weight='bold')
plt.show()
# %%
beltP = PTBcontrol[PTBcontrol.SubjectID == 1023]
# %%
#plt.title("Participant 1023: Absolute Angles")
sns.histplot(data=beltP, x="RecalculatedAngle", binwidth=5,
             color="tab:blue", binrange=(0, 180))
plt.xlabel("Angle (Bin = 5°)", weight='bold')
plt.show()
#plt.title("Participant 1023: Signed Angles")
sns.histplot(data=beltP, x="SignedAngle", binwidth=10,
             color="tab:blue", binrange=(-180, 180))
plt.xlabel("Angle (Bin = 10°)", weight='bold')
plt.show()


# %%
acontrol = PTBcontrol.groupby("SubjectID")['SignedAngle'].describe()
atest = PTBbelt.groupby("SubjectID")['SignedAngle'].describe()

# %%
violina = PTBbelt['StartingPositionIndex'].tolist()
violinb = PTBbelt['ImageName'].tolist()

violin = []
for i, row in enumerate(violina):
    violin += [str(violina[i]) + violinb[i]]
PTBbelt['violin'] = violin
violina = PTBcontrol['StartingPositionIndex'].tolist()
violinb = PTBcontrol['ImageName'].tolist()

violin = []
for i, row in enumerate(violina):
    violin += [str(violina[i]) + violinb[i]]
PTBcontrol['violin'] = violin

# %%
# pairing averages in scatter plot from belt on y and control on x
dicts = pd.DataFrame()
for x in PTBcontrol.violin.unique():
    dicts = dicts.append({'pairing': x, 'controlperformance': np.average(PTBcontrol[PTBcontrol.violin == x]["RecalculatedAngle"]), 'beltperformance': np.average(
        PTBbelt[PTBbelt.violin == x]["RecalculatedAngle"])}, ignore_index=True)
# %%
sns.set(rc={"figure.figsize": (6, 6)}, style='ticks')
ax = sns.scatterplot(data=dicts[dicts.controlperformance > dicts.beltperformance],
                     x='controlperformance', y='beltperformance', color="tab:orange")
ax = sns.scatterplot(data=dicts[dicts.controlperformance < dicts.beltperformance],
                     x='controlperformance', y='beltperformance', color="tab:blue")
# draw diagonal line
ax.plot([0, 1], [0, 1], transform=ax.transAxes, color='green')
plt.ylabel("Control participants were better", weight='bold')
plt.xlabel("Belt participants were better", weight='bold')
#plt.title("Average group accuracy per Starting location/Target pair")
ax.set_xticks(range(0, 100, 10))
ax.set_yticks(range(0, 100, 10))
plt.show()
# %%
TasksControl = pd.read_csv("controlTasks.csv")
TasksBelt = pd.read_csv("beltTasks.csv")

sns.set(rc={"figure.figsize": (7, 5)}, style='ticks')
ax = sns.regplot(data=TasksControl, x="mean", y="buildingmean", color='tab:blue')
ax = sns.regplot(data=TasksBelt, x="mean",
                 y="buildingmean", color='tab:orange')
# draw diagonal line
#ax.plot([0, 1], [0, 1], transform=ax.transAxes, color='green')
plt.ylabel("Divergence from Target Building (in °)", weight='bold')
plt.xlabel("Divergence from North (in °)", weight='bold')
#plt.title("Average Deviation from Target per Participant")
#ax.set_xticks(range(20, 180, 20))
ax.set_ylim(0, 130)
ax.set_xlim(0, 180)
#ax.set_yticks(range(20, 180, 20))
plt.show()
# %%
sns.set(rc={"figure.figsize": (5, 5)}, style='ticks')
ax = sns.regplot(data=TasksControl, x="std", y="buildingstd", color='tab:blue')
ax = sns.regplot(data=TasksBelt, x="std", y="buildingstd", color='tab:orange')
# draw diagonal line
#ax.plot([0, 1], [0, 1], transform=ax.transAxes, color='green')
plt.ylabel("Divergence from Target Building (in °)", weight='bold')
plt.xlabel("Divergence from North (in °)", weight='bold')
#plt.title("Standard Deviation of Divergence from Target per Participant")
#ax.set_xticks(range(20, 180, 20))
ax.set_ylim(0, 60)
ax.set_xlim(0, 60)
#ax.set_yticks(range(20, 180, 20))
plt.show()
# %%
print("Pearsonr Control\n" +
      str(pearsonr(TasksControl['mean'], TasksControl['buildingmean'])))
print("Pearsonr Belt\n" +
      str(pearsonr(TasksBelt['mean'], TasksBelt['buildingmean'])))
# %%
print("Pearsonr Control Sdev\n" +
      str(pearsonr(TasksControl['std'], TasksControl['buildingstd'])))
print("Pearsonr Belt Sdev\n" +
      str(pearsonr(TasksBelt['std'], TasksBelt['buildingstd'])))
# %%
print("independend t-test for the averages of both groups")
#Studnt, n=25,  2-tail
#stats.t.ppf(1-0.025, cgroup)
# df=n-1=25-1=24
# This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values.
# This test assumes that the populations have identical variances by default.
print("t-value:", stats.t.ppf(1-0.025,
                              PTBcontrol['SubjectID'].nunique() + PTBbelt['SubjectID'].nunique() - 1))
scipy.stats.ttest_ind(PTBcontroldescribe[
                      'mean'], PTBbeltdescribe['mean'])
# %%
print("independend t-test for the stds of both groups")
print("t-value:", stats.t.ppf(1-0.05, PTBbelt['SubjectID'].nunique() - 1))
scipy.stats.ttest_ind(PTBcontroldescribe[
                      'std'], PTBbeltdescribe['std'])

# %%
