## 将眼动数据按样本划分成单独tsv文件

划分前：airplane.tsv 1GB多。

划分后：airplane-1.tsv xMB 和 airplane-1_misc.tsv xMB。

将“关键列”和“混杂数据”分别提取出来，分两种文件单独存放。

进行预处理关键列数据：
- 填充空值。
- 增加is_valid列，表示某条数据是否可用，0表示不可用，1表示可用。
- 增加is_frame_terminal，表示当前眼动数据是否为某视频帧对应的最后一条眼动数据。

In [None]:
import os
import pandas as pd
from tqdm import tqdm

In [None]:
eye_dir = '/data/xgxy/BeiJing/data/lasot-eye' # /data/xgxy/BeiJing/data/LaSOT-eye /home/xgxy/LaSOT-eye
data_dir = '/data/xgxy/BeiJing/data/lasot'
preprocessed_data_dir = '/data/SunYang/works/BeiJing/data/lasot-eye-preprocessed'

In [None]:
# 1400 samples
sequence_list = ['bird-16',
                 'bird-6','bird-7','bird-18','bird-12','bird-11','bird-17','bird-3','bird-10','bird-5','bird-9','bird-19','bird-8','bird-20','bird-14','bird-2','bird-1','bird-15','bird-13','bird-4','pool-15','pool-10','pool-20','pool-4','pool-13','pool-2','pool-9','pool-11','pool-3','pool-7','pool-8','pool-5','pool-1','pool-6','pool-19','pool-12','pool-17','pool-14','pool-18','pool-16','rubicCube-6','rubicCube-2','rubicCube-12','rubicCube-15','rubicCube-13','rubicCube-3','rubicCube-5','rubicCube-9','rubicCube-16','rubicCube-7','rubicCube-1','rubicCube-17','rubicCube-4','rubicCube-20','rubicCube-19','rubicCube-10','rubicCube-14','rubicCube-11','rubicCube-8','rubicCube-18','crocodile-1','crocodile-17','crocodile-19','crocodile-12','crocodile-4','crocodile-15','crocodile-2','crocodile-20','crocodile-14','crocodile-13','crocodile-16','crocodile-6','crocodile-11','crocodile-5','crocodile-8','crocodile-10','crocodile-7','crocodile-18','crocodile-9','crocodile-3','bus-10','bus-15','bus-18','bus-9','bus-12','bus-5','bus-17','bus-4','bus-3','bus-11','bus-13','bus-7','bus-6','bus-2','bus-20','bus-1','bus-16','bus-19','bus-8','bus-14','mouse-4','mouse-7','mouse-15','mouse-19','mouse-18','mouse-9','mouse-12','mouse-1','mouse-20','mouse-5','mouse-2','mouse-14','mouse-17','mouse-16','mouse-10','mouse-8','mouse-11','mouse-13','mouse-6','mouse-3','motorcycle-4','motorcycle-9','motorcycle-12','motorcycle-20','motorcycle-16','motorcycle-6','motorcycle-1','motorcycle-17','motorcycle-10','motorcycle-14','motorcycle-7','motorcycle-11','motorcycle-18','motorcycle-15','motorcycle-19','motorcycle-5','motorcycle-3','motorcycle-2','motorcycle-13','motorcycle-8','spider-10','spider-13','spider-12','spider-19','spider-6','spider-1','spider-7','spider-17','spider-15','spider-11','spider-2','spider-8','spider-14','spider-16','spider-5','spider-4','spider-9','spider-18','spider-3','spider-20','kite-5','kite-17','kite-11','kite-12','kite-7','kite-19','kite-2','kite-13','kite-16','kite-18','kite-6','kite-9','kite-4','kite-15','kite-20','kite-3','kite-10','kite-1','kite-8','kite-14','hippo-12','hippo-14','hippo-15','hippo-16','hippo-2','hippo-18','hippo-6','hippo-5','hippo-11','hippo-4','hippo-8','hippo-10','hippo-9','hippo-7','hippo-1','hippo-3','hippo-20','hippo-13','hippo-19','hippo-17','airplane-5','airplane-8','airplane-1','airplane-16','airplane-12','airplane-17','airplane-9','airplane-18','airplane-4','airplane-6','airplane-14','airplane-7','airplane-13','airplane-10','airplane-19','airplane-20','airplane-3','airplane-2','airplane-11','airplane-15','giraffe-12','giraffe-16','giraffe-4','giraffe-20','giraffe-9','giraffe-17','giraffe-7','giraffe-19','giraffe-11','giraffe-1','giraffe-18','giraffe-8','giraffe-3','giraffe-15','giraffe-14','giraffe-13','giraffe-5','giraffe-6','giraffe-2','giraffe-10','leopard-14','leopard-5','leopard-12','leopard-13','leopard-1','leopard-3','leopard-6','leopard-11','leopard-4','leopard-18','leopard-8','leopard-20','leopard-17','leopard-19','leopard-10','leopard-2','leopard-7','leopard-9','leopard-15','leopard-16','licenseplate-1','licenseplate-14','licenseplate-6','licenseplate-5','licenseplate-10','licenseplate-15','licenseplate-7','licenseplate-17','licenseplate-8','licenseplate-20','licenseplate-13','licenseplate-11','licenseplate-12','licenseplate-18','licenseplate-9','licenseplate-16','licenseplate-3','licenseplate-2','licenseplate-4','licenseplate-19','truck-8','truck-2','truck-4','truck-16','truck-10','truck-17','truck-12','truck-13','truck-5','truck-19','truck-11','truck-6','truck-15','truck-9','truck-3','truck-20','truck-7','truck-14','truck-18','truck-1','train-1','train-2','train-19','train-12','train-15','train-20','train-16','train-13','train-3','train-11','train-10','train-17','train-6','train-8','train-18','train-7','train-9','train-4','train-14','train-5','bear-10','bear-8','bear-7','bear-4','bear-12','bear-13','bear-14','bear-2','bear-17','bear-6','bear-5','bear-18','bear-20','bear-3','bear-11','bear-19','bear-15','bear-16','bear-9','bear-1','goldfish-16','goldfish-4','goldfish-1','goldfish-10','goldfish-13','goldfish-11','goldfish-3','goldfish-7','goldfish-2','goldfish-5','goldfish-8','goldfish-17','goldfish-15','goldfish-14','goldfish-6','goldfish-9','goldfish-20','goldfish-12','goldfish-19','goldfish-18','tank-4','tank-14','tank-13','tank-2','tank-9','tank-3','tank-19','tank-17','tank-8','tank-6','tank-12','tank-20','tank-7','tank-1','tank-18','tank-15','tank-11','tank-5','tank-16','tank-10','gecko-2','gecko-16','gecko-7','gecko-18','gecko-1','gecko-15','gecko-3','gecko-13','gecko-19','gecko-6','gecko-5','gecko-10','gecko-12','gecko-8','gecko-4','gecko-14','gecko-17','gecko-9','gecko-20','gecko-11','electricfan-11','electricfan-15','electricfan-18','electricfan-3','electricfan-9','electricfan-13','electricfan-12','electricfan-19','electricfan-1','electricfan-14','electricfan-4','electricfan-7','electricfan-17','electricfan-6','electricfan-2','electricfan-16','electricfan-20','electricfan-5','electricfan-10','electricfan-8','volleyball-17','volleyball-12','volleyball-19','volleyball-4','volleyball-11','volleyball-14','volleyball-15','volleyball-7','volleyball-18','volleyball-1','volleyball-20','volleyball-10','volleyball-6','volleyball-16','volleyball-5','volleyball-8','volleyball-13','volleyball-9','volleyball-3','volleyball-2','turtle-8','turtle-14','turtle-6','turtle-5','turtle-10','turtle-9','turtle-1','turtle-4','turtle-18','turtle-16','turtle-19','turtle-15','turtle-11','turtle-13','turtle-7','turtle-20','turtle-12','turtle-3','turtle-17','turtle-2','frog-1','frog-18','frog-10','frog-3','frog-5','frog-17','frog-6','frog-12','frog-8','frog-16','frog-14','frog-19','frog-11','frog-7','frog-20','frog-15','frog-9','frog-2','frog-13','frog-4','zebra-12','zebra-5','zebra-13','zebra-1','zebra-6','zebra-15','zebra-19','zebra-18','zebra-10','zebra-14','zebra-2','zebra-20','zebra-17','zebra-8','zebra-7','zebra-9','zebra-3','zebra-16','zebra-4','zebra-11','swing-15','swing-3','swing-16','swing-9','swing-17','swing-5','swing-10','swing-1','swing-11','swing-20','swing-4','swing-13','swing-19','swing-12','swing-7','swing-18','swing-2','swing-14','swing-6','swing-8','coin-4','coin-16','coin-17','coin-11','coin-5','coin-12','coin-1','coin-18','coin-10','coin-7','coin-15','coin-14','coin-3','coin-8','coin-6','coin-2','coin-13','coin-19','coin-20','coin-9','racing-18','racing-14','racing-20','racing-2','racing-15','racing-1','racing-17','racing-16','racing-7','racing-19','racing-4','racing-12','racing-3','racing-8','racing-10','racing-13','racing-11','racing-6','racing-5','racing-9','horse-18','horse-14','horse-12','horse-6','horse-2','horse-11','horse-8','horse-15','horse-20','horse-17','horse-5','horse-4','horse-9','horse-16','horse-19','horse-7','horse-1','horse-3','horse-10','horse-13','flag-8','flag-14','flag-19','flag-4','flag-6','flag-13','flag-17','flag-9','flag-5','flag-3','flag-15','flag-10','flag-16','flag-11','flag-12','flag-1','flag-2','flag-7','flag-20','flag-18','chameleon-6','chameleon-4','chameleon-9','chameleon-15','chameleon-2','chameleon-17','chameleon-8','chameleon-10','chameleon-20','chameleon-13','chameleon-3','chameleon-7','chameleon-18','chameleon-5','chameleon-16','chameleon-14','chameleon-1','chameleon-11','chameleon-19','chameleon-12','gametarget-3','gametarget-14','gametarget-2','gametarget-16','gametarget-19','gametarget-20','gametarget-8','gametarget-12','gametarget-4','gametarget-10','gametarget-15','gametarget-5','gametarget-6','gametarget-18','gametarget-1','gametarget-9','gametarget-7','gametarget-17','gametarget-11','gametarget-13','person-10','person-12','person-6','person-3','person-15','person-2','person-18','person-19','person-7','person-11','person-4','person-14','person-16','person-20','person-1','person-5','person-13','person-17','person-8','person-9','lizard-17','lizard-2','lizard-9','lizard-12','lizard-11','lizard-5','lizard-10','lizard-15','lizard-1','lizard-16','lizard-14','lizard-8','lizard-18','lizard-3','lizard-6','lizard-4','lizard-20','lizard-7','lizard-13','lizard-19','dog-19','dog-3','dog-5','dog-18','dog-15','dog-14','dog-9','dog-8','dog-20','dog-10','dog-2','dog-16','dog-11','dog-17','dog-4','dog-6','dog-12','dog-7','dog-13','dog-1','basketball-15','basketball-1','basketball-16','basketball-7','basketball-3','basketball-20','basketball-19','basketball-13','basketball-11','basketball-2','basketball-5','basketball-10','basketball-12','basketball-17','basketball-18','basketball-4','basketball-6','basketball-8','basketball-9','basketball-14','boat-3','boat-11','boat-12','boat-15','boat-4','boat-20','boat-5','boat-18','boat-17','boat-9','boat-2','boat-13','boat-14','boat-1','boat-19','boat-8','boat-6','boat-10','boat-16','boat-7','lion-11','lion-14','lion-20','lion-9','lion-7','lion-17','lion-10','lion-12','lion-19','lion-16','lion-1','lion-6','lion-18','lion-4','lion-8','lion-5','lion-3','lion-2','lion-13','lion-15','helmet-8','helmet-7','helmet-13','helmet-4','helmet-11','helmet-14','helmet-15','helmet-17','helmet-6','helmet-12','helmet-3','helmet-16','helmet-19','helmet-18','helmet-9','helmet-1','helmet-2','helmet-10','helmet-20','helmet-5','kangaroo-15','kangaroo-13','kangaroo-16','kangaroo-4','kangaroo-9','kangaroo-20','kangaroo-12','kangaroo-17','kangaroo-18','kangaroo-8','kangaroo-6','kangaroo-1','kangaroo-3','kangaroo-19','kangaroo-2','kangaroo-14','kangaroo-5','kangaroo-7','kangaroo-10','kangaroo-11','deer-8','deer-10','deer-14','deer-19','deer-13','deer-1','deer-16','deer-17','deer-20','deer-3','deer-6','deer-9','deer-2','deer-7','deer-15','deer-18','deer-12','deer-11','deer-4','deer-5','crab-5','crab-2','crab-8','crab-11','crab-16','crab-19','crab-10','crab-4','crab-14','crab-3','crab-1','crab-15','crab-12','crab-20','crab-17','crab-6','crab-18','crab-9','crab-13','crab-7','tiger-14','tiger-19','tiger-15','tiger-17','tiger-5','tiger-10','tiger-8','tiger-7','tiger-3','tiger-16','tiger-13','tiger-20','tiger-4','tiger-9','tiger-6','tiger-12','tiger-18','tiger-2','tiger-1','tiger-11','rabbit-9','rabbit-4','rabbit-3','rabbit-16','rabbit-7','rabbit-17','rabbit-18','rabbit-13','rabbit-20','rabbit-8','rabbit-14','rabbit-1','rabbit-11','rabbit-15','rabbit-2','rabbit-12','rabbit-10','rabbit-6','rabbit-19','rabbit-5','cup-4','cup-6','cup-12','cup-20','cup-18','cup-14','cup-10','cup-11','cup-17','cup-13','cup-7','cup-3','cup-8','cup-15','cup-9','cup-1','cup-5','cup-19','cup-2','cup-16','elephant-2','elephant-20','elephant-12','elephant-11','elephant-5','elephant-9','elephant-18','elephant-3','elephant-8','elephant-13','elephant-19','elephant-4','elephant-10','elephant-1','elephant-6','elephant-14','elephant-15','elephant-16','elephant-7','elephant-17','hand-6','hand-3','hand-17','hand-19','hand-16','hand-8','hand-15','hand-14','hand-1','hand-12','hand-2','hand-11','hand-4','hand-18','hand-20','hand-7','hand-9','hand-5','hand-10','hand-13','umbrella-17','umbrella-12','umbrella-10','umbrella-2','umbrella-8','umbrella-14','umbrella-15','umbrella-20','umbrella-11','umbrella-5','umbrella-9','umbrella-6','umbrella-16','umbrella-4','umbrella-7','umbrella-13','umbrella-18','umbrella-19','umbrella-3','umbrella-1','bottle-3','bottle-2','bottle-17','bottle-14','bottle-9','bottle-7','bottle-11','bottle-20','bottle-1','bottle-10','bottle-5','bottle-6','bottle-13','bottle-19','bottle-18','bottle-12','bottle-15','bottle-8','bottle-4','bottle-16','yoyo-13','yoyo-18','yoyo-2','yoyo-20','yoyo-19','yoyo-11','yoyo-6','yoyo-12','yoyo-7','yoyo-8','yoyo-17','yoyo-1','yoyo-16','yoyo-4','yoyo-15','yoyo-10','yoyo-9','yoyo-14','yoyo-3','yoyo-5','microphone-1','microphone-15','microphone-19','microphone-18','microphone-6','microphone-16','microphone-13','microphone-11','microphone-14','microphone-20','microphone-9','microphone-7','microphone-8','microphone-12','microphone-4','microphone-17','microphone-2','microphone-3','microphone-5','microphone-10','pig-16','pig-9','pig-14','pig-7','pig-1','pig-18','pig-17','pig-4','pig-3','pig-6','pig-15','pig-20','pig-2','pig-19','pig-11','pig-10','pig-5','pig-12','pig-8','pig-13','drone-9','drone-2','drone-4','drone-6','drone-15','drone-8','drone-11','drone-18','drone-10','drone-1','drone-19','drone-14','drone-7','drone-12','drone-16','drone-20','drone-13','drone-5','drone-17','drone-3','gorilla-15','gorilla-18','gorilla-6','gorilla-11','gorilla-14','gorilla-1','gorilla-4','gorilla-10','gorilla-3','gorilla-17','gorilla-7','gorilla-2','gorilla-9','gorilla-16','gorilla-20','gorilla-19','gorilla-13','gorilla-8','gorilla-5','gorilla-12','guitar-6','guitar-13','guitar-12','guitar-14','guitar-11','guitar-5','guitar-4','guitar-8','guitar-20','guitar-10','guitar-15','guitar-7','guitar-3','guitar-18','guitar-16','guitar-1','guitar-9','guitar-2','guitar-19','guitar-17','car-3','car-15','car-17','car-6','car-20','car-16','car-7','car-11','car-19','car-5','car-18','car-8','car-12','car-1','car-4','car-9','car-2','car-13','car-10','car-14','cat-20','cat-11','cat-2','cat-8','cat-4','cat-16','cat-15','cat-3','cat-13','cat-7','cat-6','cat-19','cat-5','cat-17','cat-18','cat-14','cat-1','cat-12','cat-9','cat-10','monkey-19','monkey-4','monkey-17','monkey-8','monkey-7','monkey-2','monkey-16','monkey-15','monkey-14','monkey-12','monkey-9','monkey-18','monkey-13','monkey-11','monkey-10','monkey-5','monkey-6','monkey-1','monkey-3','monkey-20','squirrel-7','squirrel-8','squirrel-19','squirrel-9','squirrel-2','squirrel-18','squirrel-15','squirrel-10','squirrel-20','squirrel-13','squirrel-14','squirrel-12','squirrel-16','squirrel-5','squirrel-17','squirrel-1','squirrel-6','squirrel-3','squirrel-4','squirrel-11','robot-15','robot-10','robot-20','robot-13','robot-1','robot-14','robot-9','robot-3','robot-18','robot-7','robot-8','robot-2','robot-12','robot-19','robot-11','robot-16','robot-5','robot-6','robot-4','robot-17','surfboard-17','surfboard-19','surfboard-1','surfboard-2','surfboard-8','surfboard-14','surfboard-7','surfboard-15','surfboard-16','surfboard-6','surfboard-13','surfboard-9','surfboard-4','surfboard-10','surfboard-18','surfboard-3','surfboard-5','surfboard-12','surfboard-20','surfboard-11','bicycle-13','bicycle-3','bicycle-10','bicycle-12','bicycle-4','bicycle-18','bicycle-1','bicycle-20','bicycle-7','bicycle-9','bicycle-16','bicycle-15','bicycle-8','bicycle-17','bicycle-14','bicycle-11','bicycle-2','bicycle-6','bicycle-19','bicycle-5','book-4','book-12','book-13','book-6','book-19','book-1','book-3','book-2','book-5','book-17','book-11','book-8','book-10','book-7','book-20','book-14','book-18','book-16','book-15','book-9','hat-1','hat-20','hat-4','hat-5','hat-15','hat-9','hat-8','hat-19','hat-18','hat-6','hat-7','hat-13','hat-11','hat-12','hat-2','hat-17','hat-14','hat-3','hat-10','hat-16','fox-20','fox-13','fox-11','fox-3','fox-4','fox-2','fox-14','fox-16','fox-5','fox-8','fox-7','fox-10','fox-17','fox-9','fox-12','fox-1','fox-19','fox-15','fox-6','fox-18','skateboard-6','skateboard-11','skateboard-14','skateboard-16','skateboard-1','skateboard-15','skateboard-18','skateboard-7','skateboard-10','skateboard-9','skateboard-13','skateboard-4','skateboard-20','skateboard-17','skateboard-3','skateboard-2','skateboard-5','skateboard-12','skateboard-8','skateboard-19','sepia-3','sepia-4','sepia-6','sepia-10','sepia-5','sepia-13','sepia-9','sepia-12','sepia-19','sepia-2','sepia-18','sepia-15','sepia-1','sepia-7','sepia-11','sepia-20','sepia-17','sepia-8','sepia-16','sepia-14','shark-18','shark-14','shark-2','shark-3','shark-20','shark-5','shark-19','shark-4','shark-11','shark-6','shark-16','shark-9','shark-7','shark-12','shark-8','shark-17','shark-15','shark-13','shark-1','shark-10','cattle-1','cattle-19','cattle-4','cattle-2','cattle-5','cattle-10','cattle-15','cattle-11','cattle-12','cattle-3','cattle-18','cattle-9','cattle-8','cattle-13','cattle-6','cattle-14','cattle-16','cattle-20','cattle-17','cattle-7','sheep-3','sheep-6','sheep-10','sheep-12','sheep-17','sheep-11','sheep-9','sheep-13','sheep-18','sheep-5','sheep-16','sheep-8','sheep-14','sheep-4','sheep-15','sheep-1','sheep-7','sheep-19','sheep-20','sheep-2', ]

# 填充值
fill_value = -1

# key columns
key_columns = [
    'Participant name',
    'Gaze point X',
    'Gaze point Y',
    'Gaze point left X',
    'Gaze point left Y',
    'Gaze point right X',
    'Gaze point right Y',
    'Gaze direction left X',
    'Gaze direction left Y',
    'Gaze direction left Z',
    'Gaze direction right X',
    'Gaze direction right Y',
    'Gaze direction right Z',
    'Pupil diameter left',
    'Pupil diameter right',
    'Pupil diameter filtered',
    'Eye openness left',
    'Eye openness right',
    'Eye openness filtered',
    'Validity left',
    'Validity right',
    'Eye position left X (DACSmm)',
    'Eye position left Y (DACSmm)',
    'Eye position left Z (DACSmm)',
    'Eye position right X (DACSmm)',
    'Eye position right Y (DACSmm)',
    'Eye position right Z (DACSmm)',
    'Gaze point left X (DACSmm)',
    'Gaze point left Y (DACSmm)',
    'Gaze point right X (DACSmm)',
    'Gaze point right Y (DACSmm)',
    'Gaze point X (MCSnorm)',
    'Gaze point Y (MCSnorm)',
    'Gaze point left X (MCSnorm)',
    'Gaze point left Y (MCSnorm)',
    'Gaze point right X (MCSnorm)',
    'Gaze point right Y (MCSnorm)',
    'Presented Stimulus name',
    'Presented Media name',
    'Presented Media width',
    'Presented Media height',
    'Presented Media position X (DACSpx)',
    'Presented Media position Y (DACSpx)',
    'Eye movement type',
    'Gaze event duration',
    'Eye movement type index',
    'Fixation point X',
    'Fixation point Y',
    'Fixation point X (MCSnorm)',
    'Fixation point Y (MCSnorm)',
]
# misc 列 表示一些混杂数据 单独存放
misc_columns = [
    'Recording timestamp',
    'Computer timestamp',
    'Sensor',
    'Project name',
    'Export date',
    'Recording name',
    'Recording date',
    'Recording date UTC',
    'Recording start time',
    'Recording start time UTC',
    'Recording duration',
    'Timeline name',
    'Recording Fixation filter name',
    'Recording software version',
    'Recording resolution height',
    'Recording resolution width',
    'Recording monitor latency',
    'Average calibration accuracy (mm)',
    'Average calibration precision SD (mm)',
    'Average calibration precision RMS (mm)',
    'Average calibration accuracy (degrees)',
    'Average calibration precision SD (degrees)',
    'Average calibration precision RMS (degrees)',
    'Average calibration accuracy (pixels)',
    'Average calibration precision SD (pixels)',
    'Average calibration precision RMS (pixels)',
    'Average validation accuracy (mm)',
    'Average validation precision SD (mm)',
    'Average validation precision RMS (mm)',
    'Average validation accuracy (degrees)',
    'Average validation precision SD (degrees)',
    'Average validation precision RMS (degrees)',
    'Average validation accuracy (pixels)',
    'Average validation precision SD (pixels)',
    'Average validation precision RMS (pixels)',
    'Eyetracker timestamp',
    'Event',
    'Event value',
]

clazz_list = sorted([s for s in set([s.split('-')[0] for s in sequence_list])])

In [None]:
for clazz in clazz_list:  
    # load
    eye_raw_data = pd.read_csv(os.path.join(eye_dir, f"{clazz}.tsv"), sep='\t')
    # fill nan
    eye_raw_data = eye_raw_data.fillna(fill_value)
    # split
    eye_raw_key_data = eye_raw_data[key_columns]
    # 增加一列 'is_valid' 'is_terminal' 并指定默认值为 0
    eye_raw_key_data['is_valid'] = 0 # 表示本条眼动数据是否有效
    eye_raw_key_data['is_frame_terminal'] = 0 # 表示一帧视频对应的最后一个眼动数据
    eye_raw_misc_data = eye_raw_data[misc_columns]
    
    presented_stimulus_name = eye_raw_data['Presented Stimulus name'].to_list()
    
    result_dir = os.path.join(preprocessed_data_dir, clazz)
    os.makedirs(result_dir, exist_ok=True)
    # init
    sample_key_data = []
    sample_misc_data = []
    sample_name = None
    for idx, name in tqdm(enumerate(presented_stimulus_name), desc=f'{clazz}'):
        if name == fill_value or name == 'Eyetracker Calibration' or name == 'restImage': 
            continue
        
        if name != sample_name or idx == len(presented_stimulus_name)-1: 
            # save
            if len(sample_key_data) != 0:
                sample_key_data = pd.concat(sample_key_data)
                sample_key_data.to_csv(os.path.join(result_dir, f"{sample_name}.tsv"), sep='\t', index=False)
                sample_misc_data = pd.concat(sample_misc_data)
                sample_misc_data.to_csv(os.path.join(result_dir, f"{sample_name}_misc.tsv"), sep='\t', index=False)
            # init
            sample_name = name
            sample_key_data = []
            sample_misc_data = []
            frame_start_idx = idx # 0-based
            frame_eye_sample_num = int(sum(eye_raw_data['Presented Stimulus name'] == sample_name) / len(os.listdir(os.path.join(data_dir, clazz, sample_name, 'img'))))
        
        key_data_slice = eye_raw_key_data.iloc[[idx], :]
        misc_data_slice = eye_raw_misc_data.iloc[[idx], :]
        
        # check valid
        if (key_data_slice != fill_value).all(axis=1).values.all():
            if key_data_slice['Validity left'].item() == 'Valid' and key_data_slice['Validity right'].item() == 'Valid':
                key_data_slice['is_valid'] = 1
                pass
        # check frame terminal
        if frame_start_idx + frame_eye_sample_num - 1 == idx:
            frame_start_idx = idx + 1 # 0-based
            key_data_slice['is_frame_terminal'] = 1
            pass

        sample_key_data.append(key_data_slice)
        sample_misc_data.append(misc_data_slice)

## 处理划分后的眼动数据

处理前，如airplane-1.tsv（该文件是由对airplane.tsv处理和划分得到）；处理后，如airplane-1.txt，对眼动数据取平均。

按照原视频，一个视频帧内的眼动数据取平均值。

一个视频帧内的眼动数据数量 = 一个样本所有眼动数据数量 / 该视频总帧数

In [21]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [22]:
root_dir = '/data/guohua/BeiJing/data/lasot-eye-preprocessed'
lasot_dir = '/data/guohua/BeiJing/data/lasot'
output_dir = '/data/guohua/BeiJing/data/lasot-eye-sy'
os.makedirs(output_dir, exist_ok=True)
clazz_list = sorted(os.listdir(root_dir))

In [26]:
for clazz in clazz_list:
    clazz_dir = os.path.join(root_dir, clazz)
    sample_list = os.listdir(clazz_dir)
    for sample in tqdm(sample_list, desc=f'{clazz}'):
        # 跳过misc
        if sample.endswith('_misc.tsv'):
            continue
        
        sample_name = sample.replace('.tsv', '')
        sample_path = os.path.join(clazz_dir, sample)
        eye_data = pd.read_csv(sample_path, sep='\t')
        video_frame_num = len(pd.read_csv(os.path.join(lasot_dir, clazz, sample_name, 'groundtruth.txt'), delimiter=',', header=None, dtype=np.int64, na_filter=False).values)

        video_frame_eye = []
        final_result = []
        
        for eye_index in range(len(eye_data['is_frame_terminal'])):
            is_frame_terminal = eye_data['is_frame_terminal'][eye_index]
            is_valid = eye_data['is_valid'][eye_index]
            
            # 只处理有效数据，跳过无效数据
            if is_valid == 1:
                w = eye_data['Presented Media width'][eye_index]
                h = eye_data['Presented Media height'][eye_index]
                x = eye_data['Gaze point X (MCSnorm)'][eye_index]
                y = eye_data['Gaze point Y (MCSnorm)'][eye_index]
                video_frame_eye.append((int(x*w), int(y*h))) # 眼动数据计算公式
            
            # 终止帧
            if is_frame_terminal == 1:
                # 判断1个video帧内的眼动数据是否都是无效。若是，则赋值为(0, 0)；若不是，求平均值
                if len(video_frame_eye) == 0:
                    video_frame_eye = np.array([0, 0], dtype=np.int64)
                else:
                    video_frame_eye = np.array(video_frame_eye)
                    video_frame_eye = np.mean(video_frame_eye, axis=0, dtype=np.int64)
                final_result.append(video_frame_eye)
                video_frame_eye = []
            
            # 跳过多余的眼动数据
            if len(final_result) == video_frame_num:
                break
        
        # 保存
        output_path = os.path.join(output_dir, clazz)
        os.makedirs(output_path, exist_ok=True)
        np.savetxt(os.path.join(output_path, f'{sample_name}.txt'), np.array(final_result), fmt='%d', delimiter=',')

airplane: 100%|██████████| 40/40 [01:37<00:00,  2.45s/it]
basketball: 100%|██████████| 40/40 [00:57<00:00,  1.43s/it]
bear: 100%|██████████| 40/40 [01:03<00:00,  1.59s/it]
bicycle: 100%|██████████| 40/40 [00:48<00:00,  1.20s/it]
bird: 100%|██████████| 40/40 [00:56<00:00,  1.41s/it]
boat: 100%|██████████| 40/40 [00:59<00:00,  1.48s/it]
book: 100%|██████████| 40/40 [00:48<00:00,  1.22s/it]
bottle: 100%|██████████| 40/40 [00:54<00:00,  1.37s/it]
bus: 100%|██████████| 40/40 [01:11<00:00,  1.79s/it]
car: 100%|██████████| 40/40 [00:46<00:00,  1.17s/it]
cat: 100%|██████████| 40/40 [00:43<00:00,  1.08s/it]
cattle: 100%|██████████| 40/40 [00:34<00:00,  1.15it/s]
chameleon: 100%|██████████| 40/40 [00:43<00:00,  1.10s/it]
coin: 100%|██████████| 40/40 [00:46<00:00,  1.15s/it]
crab: 100%|██████████| 40/40 [00:44<00:00,  1.11s/it]
crocodile: 100%|██████████| 40/40 [00:47<00:00,  1.19s/it]
cup: 100%|██████████| 40/40 [00:37<00:00,  1.07it/s]
deer: 100%|██████████| 40/40 [00:56<00:00,  1.42s/it]
dog: 