In [3]:
import pandas as pd
import json
from datetime import datetime, timedelta
import os


#### Charge json and file

In [36]:
df = pd.read_csv('gaze_clean.csv')
df["current_time"] = pd.to_datetime(df["current_time"], format="%Y-%m-%dT%H:%M:%S.%fZ")

with open("times/sebastian_posts_times.json", "r") as file:
    json_data = json.load(file)

json_obj = json_data[0]
initial_date = datetime.strptime(json_obj["initialDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
initial_date, df.iloc[0]["current_time"]

(datetime.datetime(2024, 6, 21, 4, 16, 23, 991000),
 Timestamp('2024-06-21 04:16:22.883000'))

In [33]:
# Step 1: Create a new DataFrame with the rows where `time_difference` is less than zero
last_time_seconds = df[df["current_time"] - initial_date < timedelta(seconds=0)]["time_seconds"].iloc[-1]

# Step 2: Remove all rows where `time_seconds` is less than 0
df["time_seconds"] = df["time_seconds"] - last_time_seconds
df = df[df["time_seconds"] >= 0]

# Reset the index
df.reset_index(drop=True, inplace=True)

# Initialize the postID column
df["postID"] = None

# Iterate through each JSON object to assign postID
for obj in json_data:
    post_start_time = obj["PostStartTime"]
    post_end_time = obj["PostEndTime"]
    post_id = obj["postID"]

    # Assign `postID` based on `PostStartTime` and `PostEndTime`
    df.loc[
        (df["time_seconds"] >= post_start_time) & (df["time_seconds"] <= post_end_time),
        "postID",
    ] = post_id

# Remove all rows where `postID` is None
df = df[df["postID"].notna()].reset_index(drop=True)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["postID"] = None


Unnamed: 0,x,y,time_seconds,current_time,postID
13,994,602,0.216501,2024-06-21 04:16:24.200,3
14,991,609,0.233156,2024-06-21 04:16:24.217,3
15,989,609,0.249810,2024-06-21 04:16:24.233,3
16,991,588,0.266464,2024-06-21 04:16:24.251,3
17,978,595,0.283118,2024-06-21 04:16:24.266,3
...,...,...,...,...,...
3509,816,143,58.440704,2024-06-21 04:17:22.424,1
3510,818,145,58.457359,2024-06-21 04:17:22.441,1
3511,822,142,58.474013,2024-06-21 04:17:22.474,1
3512,820,140,58.490668,2024-06-21 04:17:22.476,1


In [46]:
# Ruta a la carpeta de capturas de pantalla
screenshots_folder = 'screenshots/'

# Obtener la lista de archivos de la carpeta de capturas de pantalla
screenshot_files = os.listdir(screenshots_folder)

# Filtrar solo los archivos PNG
screenshot_files = [file for file in screenshot_files if file.endswith('.png')]

# Lista para almacenar las asignaciones de postID
screenshot_assignments = []

# Iterar sobre cada archivo de captura de pantalla
for file in screenshot_files:
    # Extraer la marca de tiempo del nombre del archivo
    timestamp_str = file.replace('screenshot_', '').replace('.png', '').replace('_', ':')
    screenshot_time = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S")
    
    # Asignar el postID correcto basado en la marca de tiempo
    assigned_post_id = None
    for obj in json_data:
        initial_date = datetime.strptime(obj["initialDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
        post_start_time = initial_date + timedelta(seconds=obj["PostStartTime"])
        post_end_time = initial_date + timedelta(seconds=obj["PostEndTime"])

        if post_start_time <= screenshot_time <= post_end_time:
            assigned_post_id = obj["postID"]
            break

    # Añadir la asignación a la lista
    screenshot_assignments.append({
        "filename": file,
        "screenshot_time": screenshot_time,
        "postID": assigned_post_id
    })

# Crear un DataFrame con las asignaciones
screenshot_df = pd.DataFrame(screenshot_assignments)
screenshot_df = screenshot_df[screenshot_df["postID"].notna()].reset_index(drop=True)
screenshot_df = screenshot_df.drop_duplicates(subset='postID', keep='first')
screenshot_df.sort_values(by="screenshot_time", inplace=True)

# Mostrar el DataFrame resultante
print(screenshot_df)


                             filename     screenshot_time  postID
2  screenshot_2024-06-21T04_16_28.png 2024-06-21 04:16:28     3.0
1  screenshot_2024-06-21T04_16_49.png 2024-06-21 04:16:49     6.0
5  screenshot_2024-06-21T04_16_55.png 2024-06-21 04:16:55     2.0
9  screenshot_2024-06-21T04_17_11.png 2024-06-21 04:17:11     5.0
0  screenshot_2024-06-21T04_17_21.png 2024-06-21 04:17:21     1.0


In [52]:
screenshot_df['postID'] = screenshot_df['postID'].astype(int)

# Crear un diccionario para mapear postID a filename de screenshot_df
postID_to_filename = screenshot_df.set_index('postID')['filename'].to_dict()

# Asignar el filename correspondiente a cada fila de df basado en postID
df['screenshot_filename'] = df['postID'].map(postID_to_filename)

# Mostrar el DataFrame resultante
print(df["screenshot_filename"])



0       screenshot_2024-06-21T04_16_28.png
1       screenshot_2024-06-21T04_16_28.png
2       screenshot_2024-06-21T04_16_28.png
3       screenshot_2024-06-21T04_16_28.png
4       screenshot_2024-06-21T04_16_28.png
                       ...                
2860    screenshot_2024-06-21T04_17_21.png
2861    screenshot_2024-06-21T04_17_21.png
2862    screenshot_2024-06-21T04_17_21.png
2863    screenshot_2024-06-21T04_17_21.png
2864    screenshot_2024-06-21T04_17_21.png
Name: screenshot_filename, Length: 2865, dtype: object


In [57]:
# Crear la carpeta gaze/ si no existe
output_folder = 'gaze_posts/'
os.makedirs(output_folder, exist_ok=True)

# Obtener la lista única de postID
unique_post_ids = df['postID'].unique()

# Iterar sobre cada postID y guardar el DataFrame filtrado en un archivo CSV
for post_id in unique_post_ids:
    # Filtrar el DataFrame por el postID actual
    df_filtered = df[df['postID'] == post_id]
    
    # Crear el nombre del archivo
    filename = f'sebastian_gaze_{post_id}.csv'
    
    # Guardar el DataFrame filtrado en un archivo CSV
    df_filtered.to_csv(os.path.join(output_folder, filename), index=False)

print(f'Archivos CSV creados en la carpeta {output_folder}')


Archivos CSV creados en la carpeta gaze_posts/


In [69]:
width = 1920
height = 1080

for post_id in unique_post_ids:
    df_file = f"gaze_posts/sebastian_gaze_{post_id}.csv"
    df_file = pd.read_csv(df_file)
    # print(df_file)
    print(f"PostID: {post_id}")
    input_file = f"gaze_posts/sebastian_gaze_{post_id}.csv"
    image_file = f"screenshots/{df_file["screenshot_filename"].loc[0]}"
    heatmap_file = f"heatmaps/sebastian_heatmap_{post_id}.png"
    print(f"Input file: {input_file},\nImage file: {image_file},\nheatmap_file : {heatmap_file}") 
    # break
    os.system(
        f"python ../../scripts/gazeheatplot.py {input_file} {width} {height} -b {image_file} -o {heatmap_file}")



PostID: 3
Input file: gaze_posts/sebastian_gaze_3.csv,
Image file: screenshots/screenshot_2024-06-21T04_16_28.png,
heatmap_file : heatmaps/sebastian_heatmap_3.png
PostID: 6
Input file: gaze_posts/sebastian_gaze_6.csv,
Image file: screenshots/screenshot_2024-06-21T04_16_49.png,
heatmap_file : heatmaps/sebastian_heatmap_6.png
PostID: 2
Input file: gaze_posts/sebastian_gaze_2.csv,
Image file: screenshots/screenshot_2024-06-21T04_16_55.png,
heatmap_file : heatmaps/sebastian_heatmap_2.png
PostID: 5
Input file: gaze_posts/sebastian_gaze_5.csv,
Image file: screenshots/screenshot_2024-06-21T04_17_11.png,
heatmap_file : heatmaps/sebastian_heatmap_5.png
PostID: 1
Input file: gaze_posts/sebastian_gaze_1.csv,
Image file: screenshots/screenshot_2024-06-21T04_17_21.png,
heatmap_file : heatmaps/sebastian_heatmap_1.png


In [71]:
import pandas as pd
import json
from datetime import datetime, timedelta
import os

def load_gaze_data(file_path):
    df = pd.read_csv(file_path)
    df["current_time"] = pd.to_datetime(df["current_time"], format="%Y-%m-%dT%H:%M:%S.%fZ")
    return df

def load_json_data(file_path):
    with open(file_path, "r") as file:
        json_data = json.load(file)
    return json_data

In [73]:
def process_gaze_data(df, json_data):
    #Step 1: initial date and first time
    initial_date = datetime.strptime(json_data[0]["initialDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
    last_time_seconds = df[df["current_time"] - initial_date < timedelta(seconds=0)]["time_seconds"].iloc[-1]

    # Step 2: Remove all rows where `time_seconds` is less than 0
    df["time_seconds"] = df["time_seconds"] - last_time_seconds
    df = df[df["time_seconds"] >= 0].reset_index(drop=True)
    df["postID"] = None

    #Step 3: Assign postID based on PostStartTime and PostEndTime
    for obj in json_data:
        post_start_time = obj["PostStartTime"]
        post_end_time = obj["PostEndTime"]
        post_id = obj["postID"]

        df.loc[
            (df["time_seconds"] >= post_start_time) & (df["time_seconds"] <= post_end_time),
            "postID",
        ] = post_id

    #Step 4: Remove all rows where `postID` is None
    df = df[df["postID"].notna()].reset_index(drop=True)
    return df

def process_screenshots(screenshots_folder, json_data):

    #Step 1: Get the list of screenshot files
    screenshot_files = os.listdir(screenshots_folder)
    screenshot_files = [file for file in screenshot_files if file.endswith('.png')]
    screenshot_assignments = []

    #Step 2: Assign postID based on screenshot timestamp
    for file in screenshot_files:
        timestamp_str = file.replace('screenshot_', '').replace('.png', '').replace('_', ':')
        screenshot_time = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S")
        assigned_post_id = None
        for obj in json_data:
            initial_date = datetime.strptime(obj["initialDate"], "%Y-%m-%dT%H:%M:%S.%fZ")
            post_start_time = initial_date + timedelta(seconds=obj["PostStartTime"])
            post_end_time = initial_date + timedelta(seconds=obj["PostEndTime"])

            if post_start_time <= screenshot_time <= post_end_time:
                assigned_post_id = obj["postID"]
                break

        screenshot_assignments.append({
            "filename": file,
            "screenshot_time": screenshot_time,
            "postID": assigned_post_id
        })
    
    #Step 3: Create a DataFrame with the assignments
    screenshot_df = pd.DataFrame(screenshot_assignments)
    screenshot_df = screenshot_df[screenshot_df["postID"].notna()].reset_index(drop=True)
    screenshot_df = screenshot_df.drop_duplicates(subset='postID', keep='first')
    screenshot_df.sort_values(by="screenshot_time", inplace=True)
    return screenshot_df

def assign_screenshot_filenames(df, screenshot_df):
    screenshot_df['postID'] = screenshot_df['postID'].astype(int)
    postID_to_filename = screenshot_df.set_index('postID')['filename'].to_dict()
    df['screenshot_filename'] = df['postID'].map(postID_to_filename)
    return df

def save_split_files(df, output_folder, name):
    os.makedirs(output_folder, exist_ok=True)
    unique_post_ids = df['postID'].unique()

    for post_id in unique_post_ids:
        df_filtered = df[df['postID'] == post_id]
        filename = f'{name}_gaze_{post_id}.csv'
        df_filtered.to_csv(os.path.join(output_folder, filename), index=False)
    
    print(f'Archivos CSV creados en la carpeta {output_folder}')

def create_heatmaps(unique_post_ids, width, height, name):
    for post_id in unique_post_ids:
        df_file = f"gaze_posts/{name}_gaze_{post_id}.csv"
        df_file = pd.read_csv(df_file)
        input_file = f"gaze_posts/{name}_gaze_{post_id}.csv"
        image_file = f"screenshots/{df_file['screenshot_filename'].iloc[0]}"
        heatmap_file = f"heatmaps/{name}_heatmap_{post_id}.png"
        os.system(
            f"python ../../scripts/gazeheatplot.py {input_file} {width} {height} -b {image_file} -o {heatmap_file}")


In [None]:
def main():

    name = 'sebastian'
    input_file = 'gaze_clean.csv'
    json_file = f'times/{name}_posts_times.json'

    df = load_gaze_data(input_file)
    json_data = load_json_data(json_file)
    df = process_gaze_data(df, json_data)
    screenshot_df = process_screenshots('screenshots/', json_data)
    df = assign_screenshot_filenames(df, screenshot_df)
    save_split_files(df, 'gaze_posts/', name)
    unique_post_ids = df['postID'].unique()
    create_heatmaps(unique_post_ids, width=1920, height=1080)

if __name__ == "__main__":
    main()
