# Map CSV together and remove elsewhere

This file will combine the participants csv together with the fixations and ROI csv.


In [1]:
import pandas as pd
import pandas as pd
import seaborn as sns
import cv2
from matplotlib import pyplot as plt
from random import choice
import numpy as np

The fixations that are outside the picture get removed since they do not contribute to the overal research.


In [None]:
pad = "../../data/csv/"
image_pad =  "../../data/Images/ROIs/"

participants = pd.read_csv(pad+"participants.csv")
fixations = pd.read_csv(pad+"fixations.csv")
ROI = pd.read_csv(pad + "ROIs.csv")

fix = fixations.merge(participants, how="left", on="Participant")
#remove fixations outside the image
fix = fix[(fix["X"] > 0) & (fix["X"] < 1024) & (fix["Y"] > 0) & (fix["Y"] < 768)]

condition_to_remove = (fix["Image"] == 1) & (fix["Scene"] == 1) & (fix["X"] > 731)
filtered_fix = fix.loc[~condition_to_remove]

After removing the fixations that are out of the image, there are still 2550 fixations that are elsewhere in the data.


In [3]:
len(filtered_fix[(filtered_fix["Name"] == "Elsewhere")])

1498

To reduce this amount, a function is created. This function recieves the old x and y, the image and scene for which we want a new ROI and the pixel offset. We take this x and y and offset is by the offset. We then look at the rgb value at this location and look in our ROI csv to pick the corresponding ROI. If we don't find it, we retry with a bigger offset. Finally the new ROI is returned.


In [4]:
def get_name_from_coordinates(X, Y, image, scene, offset=1):
    img = cv2.imread(image_pad + f'scene{scene}_image{image}.png')
    xnew, ynew = int(X+choice([-offset,offset])), int(Y+choice([-offset,offset]))
    if xnew >= len(img[1]):
        xnew = len(img[1]) - 5
    if ynew >= len(img):
        ynew = len(img) - 5
    [B,G,R] = img[ynew, xnew,:]
    #print(B, G, R)
    this_ROI = ROI[(ROI["C1"] == R) & (ROI["C2"] == G) & (ROI["C3"] == B) & (ROI["Image"] == image) &  (ROI["Scene"] == scene)]

    if len(this_ROI) > 0:
        this_name = this_ROI.iloc[0]["Name"]
        return this_name
    if not offset == 3:
        return get_name_from_coordinates(X, Y, image, scene, 3)#'Elsewhere'
    return 'Elsewhere'

The function above is only helpfull if we actually use it so that's what the code below does. The dataset gets filtered on elsewhere, the necessary columns are given to the function and the result get's saved in the Name column.


In [5]:
filtered_fix['Name'] = filtered_fix.apply(lambda row: get_name_from_coordinates(row['X'], row['Y'], row['Image'], row['Scene']) 
                      if row['Name'] == 'Elsewhere' else row['Name'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fix['Name'] = filtered_fix.apply(lambda row: get_name_from_coordinates(row['X'], row['Y'], row['Image'], row['Scene'])


Here the same happens as above but we specifically look at fixations that have NaN in the name column.


In [6]:
filtered_fix['Name'] = filtered_fix.apply(lambda row: get_name_from_coordinates(row['X'], row['Y'], row['Image'], row['Scene']) 
                      if pd.isna(row['Name']) else row['Name'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fix['Name'] = filtered_fix.apply(lambda row: get_name_from_coordinates(row['X'], row['Y'], row['Image'], row['Scene'])


The amount of fixations that are elsewhere have now succesfully been reduced from 2550 to 75.


In [7]:
len(filtered_fix[(filtered_fix["Name"] == "Elsewhere")]) 

49

As a final thing, the remaining fixations with elsewhere gets removed.


In [8]:
fix_filtered = filtered_fix[filtered_fix['Name'] != 'Elsewhere']

Lastly we save all of this to a new csv.


In [9]:
fix_filtered.to_csv(pad + "cleaned_fix2.csv")

Because the evidence, exitentry and movable columns are not in the fixations csv we add these from the ROI csv.


In [10]:
fix_evidence = fix_filtered.merge(ROI[['Name', 'Evidence', 'Movable', 'ExitEntry', 'Image', 'Scene']], how='left', on=['Name', 'Image', 'Scene'])

fix_evidence.head()

Unnamed: 0,Image,Scene,ROI,Participant,FixDur,FixNr,Start,End,X,Y,Name,NextROI,experience,Evidence,Movable,ExitEntry
0,1,1,0,2.0,103.0,1.0,7.0,109.0,510.9,381.0,Back_wall,Bottle_on_table,Control,No,No,No
1,1,1,22,2.0,77.0,2.0,236.0,312.0,450.8,307.0,Bottle_on_table,Floor,Control,Yes,Yes,No
2,1,1,9,2.0,300.0,3.0,347.0,646.0,414.5,447.9,Floor,Body,Control,No,No,No
3,1,1,0,2.0,276.0,6.0,1386.0,1661.0,394.4,414.5,Floor,Elsewhere,Control,No,No,No
4,1,1,0,2.0,296.0,7.0,1711.0,2006.0,655.3,416.2,Side_wall,Elsewhere,Control,No,No,No


In [11]:
fix_evidence.to_csv(pad + "fix_evidence2.csv")