### Load packages

In [1]:
!pwd

/home/martin/Projects/ongoing/sphinx-ai/src/pupils_manager


In [2]:
import duckdb as ddb
import pandas as pd
import hashlib
from datetime import datetime
from os import listdir
from os.path import isfile, join
from dataclasses import astuple

from pupil_classes import User, Test


DB_LOCATION = "../../data/pupils/db.duckdb"

### Database session

In [3]:
con = ddb.connect(database=DB_LOCATION, read_only=False)

### Preprocess and store data in DB

In [9]:
# PUPIL_FILES_PATH = "/home/martin/Projects/sphinx-ai/data/pupils/tests/Test Inicial Resistencia Ocular/Nico/"
# PUPIL_FILES_PATH = ("/home/martin/Projects/sphinx-ai/data/pupils/tests/Test Inicial Sacadicos/Pablo/")
# PUPIL_FILES_PATH = '/home/martin/Projects/sphinx-ai/data/pupils/tests/Test Inicial Vision Periferica/VP - Logical/Nico/'
PUPIL_FILES_PATH = "/home/martin/Projects/ongoing/sphinx-ai/data/pupils/tests/Test visual medico/estabilidad- fijacion/Estabilidad-Fijacion Pablo 29.3.2024/to_use/"


pupil_files = [
    f
    for f in listdir(PUPIL_FILES_PATH)
    if (isfile(join(PUPIL_FILES_PATH, f)) & (f[-4:] == ".csv"))
]

pupil_files = [file for file in pupil_files if file != "export_info.csv"]
data_file_path = (
    PUPIL_FILES_PATH + [file for file in pupil_files if file == "iMotions_info.csv"][0]
)
pupil_files = [file for file in pupil_files if file != "iMotions_info.csv"]


def generate_id(input_string):
    # Convert input string to bytes
    input_bytes = input_string.encode("utf-8")

    # Generate hash value using SHA-256 hash function
    hash_object = hashlib.sha256()
    hash_object.update(input_bytes)
    hash_value = hash_object.hexdigest()

    # Return the first 8 characters of the hash value as the ID
    return hash_value[:8]


def get_test_player_data(data_file_path: str) -> dict:
    with open(data_file_path, "r") as file:
        relevant_data = {
            "Start Date": "",
            "Start Time (System)": "",
            "Start Time (Synced)": "",
            "Recording UUID": "",
        }

        for line in file.readlines():
            try:
                key, value = line.split(",")
                if key in relevant_data.keys():
                    relevant_data[key] = value.strip()
            except:
                pass

    return relevant_data


def get_pupils_recording_id(data_file_path: str):
    return get_test_player_data(data_file_path)["Recording UUID"]


def transform_unix_to_datetime(unix_time: float, data_file_path: str):
    player_data = get_test_player_data(data_file_path)
    offset = float(player_data["Start Time (System)"]) - float(
        player_data["Start Time (Synced)"]
    )
    wall_time = datetime.fromtimestamp(offset + unix_time).strftime(
        "%Y-%m-%d %H:%M:%S.%f"
    )
    return wall_time


def get_test_date(data_file_path: str):
    return get_test_player_data(data_file_path)["Start Date"]


user_name = "Pablo"

user_id = generate_id(user_name)
test_id = get_pupils_recording_id(data_file_path)
# test_date = get_test_date(data_file_path)

user = User(
    name=user_name,
    sport="Tenis",
    position="",
    birth_date="1990-09-26",
    id=user_id,
)

test = Test(
    id=test_id,
    test_name="Test Medico",
    variation="Estabilidad Fijacion",
    user_id=user_id,
    # test_date=test_date,
)

logger.info(user_id)
logger.info(test_id)


con.sql(
    f"""
    INSERT INTO users VALUES {astuple(user)}
"""
)

con.sql(
    f"""
    INSERT INTO tests VALUES {astuple(test)}
"""
)

dfs = []

for file in pupil_files:
    logger.info(file)
    df = pd.read_csv(join(PUPIL_FILES_PATH, file), on_bad_lines="skip")
    new_df = df.copy()

    for col_label, col_values in df.items():
        if col_label.find("timestamp") != -1:
            logger.info(col_label)
            df[col_label] = new_df[col_label].apply(
                lambda x: transform_unix_to_datetime(x, data_file_path)
            )

    if "on_surface" in file:
        surface = file.split(".")[0].split(" ")[-1]

        table_name = file.split("_Surface")[0]
        # logger.info(table_name)

        df.insert(0, "test_id", test_id)
        df.insert(1, "surface", surface)

    elif "pose_tracker" in file:
        table_name = file.split("_poses")[0]
        # logger.info(table_name)
        df.insert(0, "test_id", test_id)

    else:
        table_name = file.split(".")[0]
        # logger.info(table_name)
        df.insert(1, "test_id", test_id)

    dfs.append((table_name, df))

    ddb.register(f"{table_name}_view", df)
    insert_query = f"""
        INSERT INTO {table_name} SELECT * FROM df        
    """
    con.execute(insert_query)

c6fcb8c1
d8075042-08bb-42c0-9ba9-e36874efd61e
head_pose_tracker_poses.csv
timestamp
fixations_on_surface_Surface 1.csv
world_timestamp
start_timestamp
blinks.csv
start_timestamp
end_timestamp
gaze_positions.csv
gaze_timestamp
pupil_positions.csv
pupil_timestamp
gaze_positions_on_surface_Surface 1.csv
world_timestamp
gaze_timestamp
fixations.csv
start_timestamp


In [None]:
data = pd.read_csv(
    "/home/martin/Projects/ongoing/sphinx-ai/data/pupils/tests/Test Inicial Vision Periferica/VP - Logical/Pablo/gaze_positions.csv"
)

data

Unnamed: 0,gaze_timestamp,world_index,confidence,norm_pos_x,norm_pos_y,base_data,gaze_point_3d_x,gaze_point_3d_y,gaze_point_3d_z,eye_center0_3d_x,...,eye_center0_3d_z,gaze_normal0_x,gaze_normal0_y,gaze_normal0_z,eye_center1_3d_x,eye_center1_3d_y,eye_center1_3d_z,gaze_normal1_x,gaze_normal1_y,gaze_normal1_z
0,39845.912907,0,0.999061,0.252557,0.465484,39845.912891-0 39845.912923-1,-240.507199,-9.742289,571.533020,20.035771,...,-19.889607,-0.402882,-0.028070,0.914821,-39.632771,14.762284,-21.00494,-0.320748,-0.049327,0.945879
1,39845.916920,0,0.994061,0.269133,0.479069,39845.920917-0 39845.912923-1,-371.258476,-28.739596,957.166382,20.035771,...,-19.889607,-0.371482,-0.034279,0.927807,-39.632771,14.762284,-21.00494,-0.320748,-0.049327,0.945879
2,39845.920929,0,0.990000,0.272736,0.470157,39845.920917-0 39845.92094-1,-232.908871,-13.053561,611.717721,20.035771,...,-19.889607,-0.371482,-0.034279,0.927807,-39.632771,14.762284,-21.00494,-0.291843,-0.048932,0.955214
3,39845.924916,0,0.995000,0.276189,0.472033,39845.928892-0 39845.92094-1,-251.141208,-15.512309,671.407653,20.035771,...,-19.889607,-0.364898,-0.033907,0.930430,-39.632771,14.762284,-21.00494,-0.291843,-0.048932,0.955214
4,39845.928895,0,0.973512,0.276403,0.472222,39845.928892-0 39845.928898-1,-245.185311,-15.278639,656.208122,20.035771,...,-19.889607,-0.364898,-0.033907,0.930430,-39.632771,14.762284,-21.00494,-0.290135,-0.050161,0.955670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14943,39906.199569,1799,0.433397,0.168382,0.439404,39906.199569000004-0,-265.355835,3.959775,434.831433,-98.652142,...,-36.213740,-0.333407,0.036129,0.942090,,,,,,
14944,39906.207539,1799,0.447398,0.172405,0.435739,39906.207539-0,-261.693630,5.630734,436.044651,-98.652142,...,-36.213740,-0.326083,0.039471,0.944517,,,,,,
14945,39906.215622,1800,0.468372,0.170167,0.437556,39906.215622-0,-263.728322,4.803092,435.371196,-98.652142,...,-36.213740,-0.330152,0.037816,0.943170,,,,,,
14946,39906.223503,1800,0.479279,0.167956,0.439370,39906.223503-0,-265.741262,3.975410,434.694252,-98.652142,...,-36.213740,-0.334178,0.036160,0.941816,,,,,,


In [None]:
a = data['gaze_timestamp'].apply(
    lambda x: transform_unix_to_datetime(x, data_file_path)
)


data[a.duplicated()]


Unnamed: 0,gaze_timestamp,world_index,confidence,norm_pos_x,norm_pos_y,base_data,gaze_point_3d_x,gaze_point_3d_y,gaze_point_3d_z,eye_center0_3d_x,...,eye_center0_3d_z,gaze_normal0_x,gaze_normal0_y,gaze_normal0_z,eye_center1_3d_x,eye_center1_3d_y,eye_center1_3d_z,gaze_normal1_x,gaze_normal1_y,gaze_normal1_z
