In [None]:
import numpy as np
import pandas as pd
import utm
import os
import pickle
import networkx as nx

In [None]:
def start_end_point(lat, lon):
    start = [lon - 55, lat + 50]
    end = [lon + 55, lat - 50]
    if start[0] > 180:
        start[0] = start[0] - 360
    elif start[0] < -180:
        start[0] = start[0] + 360
    if start[1] > 80:
        start[1] = start[1] - 160
    elif start[1] < - 80:
        start[1] = start[1] + 160

    if end[0] > 180:
        end[0] = end[0] - 360
    elif end[0] < -180:
        end[0] = end[0] + 360
    if end[1] > 80:
        end[1] = end[1] - 160
    elif end[1] < - 80:
        end[1] = end[1] + 160

    return(start, end)

def conv_func(data): # dont do this, ever, this is very bad. Will need to rewrite it for sure. NEED TO RE DO THIS FOR SURE

    starting_points = []
    ending_points = []

    for i in range(len(data)):
        start, end = start_end_point(data.iloc[i]['latitude'], data.iloc[i]['longitude'])
        
        start_utm = utm.from_latlon(start[1], start[0])
        starting_points.append(start_utm[2:])
        
        end_utm = utm.from_latlon(end[1], end[0])
        ending_points.append(end_utm[2:])

    starting_zones = pd.DataFrame(starting_points)
    ending_zones = pd.DataFrame(ending_points)

    coords = pd.concat([data, starting_zones, ending_zones],axis=1)
    coords.columns = ['latitude', 'longitude', 's_zone', 's_band', 'e_zone', 'e_band']

    coords['ns_band'] = [ ord(x) - 64 for x in coords.s_band ]
    coords['ne_band'] = [ ord(x) - 64 for x in coords.e_band ]

    bins_zone = [i*15 for i in range(0,5)]
    labels_zone = [i for i in range(1,5)]

    bins_band = [i*6 for i in range(0,5)]
    labels_band = [i for i in range(1,5)]

    # Binning results
    coords['s_bin_zone'] = pd.DataFrame(pd.cut(coords['s_zone'], bins=bins_zone, labels=labels_zone))
    coords['s_bin_band'] = pd.DataFrame(pd.cut(coords['ns_band'], bins=bins_band, labels=labels_band))
    coords['e_bin_zone'] = pd.DataFrame(pd.cut(coords['e_zone'], bins=bins_zone, labels=labels_zone))
    coords['e_bin_band'] = pd.DataFrame(pd.cut(coords['ne_band'], bins=bins_band, labels=labels_band))

    return coords

def save_obj(obj, name ):
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [None]:
subject_list = [f for f in os.listdir('./data/head_tracking_data') if 'Subject' in f] # list of all subjects
video_list = [f for f in os.listdir('./data/video_files') if 'mp4' in f] # list of all videos


### Segmented data

In [None]:
videos = {} # each video will contain all subjects

frames_per_segment = 30
# video_id = video_list[19]
for video_id in video_list:  # << thats the part that did for all videos
    subjects = {} # for each subject, we store segments which are in seperate lists

    for subject_id in subject_list:
        tmp_data = pd.read_fwf('data/head_tracking_data/'+ subject_id + '/' + video_id.split('.mp4')[0] + '.txt', header=None)
        tmp_data.columns = ['latitude', 'longitude']
        processed_data = conv_func(tmp_data)
        n_segments = int(len(tmp_data)/frames_per_segment) # ofc its not entirely coorect yet, it approximates how many segments\
        segments = []

        for seg in range(n_segments):
            start = frames_per_segment * seg
            end = frames_per_segment * (seg+1)
            segments.append(processed_data[start:end])

        subjects[subject_id] = segments

    videos[video_id] = subjects

### Non segmented data

In [None]:
videos_wo_segments = {} # each video will contain all subjects

frames_per_segment = 30
for video_id in video_list:  # << thats the part that did for all videos
    subjects = {} # for each subject, we store segments which are in seperate lists

    for subject_id in subject_list:
        tmp_data = pd.read_fwf('data/head_tracking_data/'+ subject_id + '/' + video_id.split('.mp4')[0] + '.txt', header=None)
        tmp_data.columns = ['latitude', 'longitude']
        processed_data = conv_func(tmp_data)

        subjects[subject_id] = processed_data

    videos_wo_segments[video_id] = subjects

### For big navigation graph, figures out within a segment the unique transitions (has to use segmented data)

In [None]:
full_data = {}
for video in video_list:
    many_users = {}
    # union of unique tiles within the segment, so we can connect the segments in graph
    for user in subject_list:
        single_user = pd.DataFrame()
        for segment in videos[video][user]:
            
            tmp_segment = segment.reset_index(drop=True)
            lst = []
            for i in range(len(tmp_segment)):
                lst.append(((tmp_segment.iloc[i]['s_bin_zone'], tmp_segment.iloc[i]['s_bin_band']), (tmp_segment.iloc[i]['e_bin_zone'], tmp_segment.iloc[i]['e_bin_band'])))

            df = pd.DataFrame(lst)
            df.columns = ['s', 'e']

            uniq_correct = df.groupby(['s','e']).size().reset_index().rename(columns={0:'count'})
            uniq_correct = uniq_correct.sort_values(by=['s', 'e'])

            single_user = single_user.append({'unique': str(uniq_correct[['s','e']].values)},ignore_index=True)

        many_users[user] = single_user
        
    full_data[video] = many_users

### Generates single user navigation graphs, stores it in a dictionary

In [None]:
all_videos = {}

for video_id in video_list:
    stats_subj = {}
    for subj_id in subject_list:
        G = nx.DiGraph()
        ti = pd.DataFrame()
        for i in range(len(videos_wo_segments[video_id][subj_id])-1):

            prev_top_left_zone = (videos_wo_segments[video_id][subj_id].iloc[i]['s_bin_zone'], videos_wo_segments[video_id][subj_id].iloc[i]['s_bin_band'])
            prev_bottom_right_zone = (videos_wo_segments[video_id][subj_id].iloc[i]['e_bin_zone'], videos_wo_segments[video_id][subj_id].iloc[i]['e_bin_band'])
            curr_top_left_zone = (videos_wo_segments[video_id][subj_id].iloc[i+1]['s_bin_zone'], videos_wo_segments[video_id][subj_id].iloc[i+1]['s_bin_band'])
            curr_bottom_right_zone = (videos_wo_segments[video_id][subj_id].iloc[i+1]['e_bin_zone'], videos_wo_segments[video_id][subj_id].iloc[i+1]['e_bin_band'])

            G.add_edge((prev_top_left_zone, prev_bottom_right_zone), (curr_top_left_zone, curr_bottom_right_zone))
            
        stats_subj[subj_id] = G

    all_videos[video_id] = stats_subj


### Save data to pickle

In [None]:
# save_obj(full_data, 'data')
# save_obj(videos_wo_segments, 'data_wo_segments')
# save_obj(all_videos, 'single_user_ngs(all)')