In [1]:
import numpy as np
import csv
import json
import os 
import datetime

import pprint 
pp = pprint.PrettyPrinter(indent=4)


In [2]:
import config
import utils


In [3]:
#
# Summary log type
#
class SummaryLog:
    """
    Represents one summary log file.
    """
    def __init__(self):
        self.data = []
                
    @staticmethod
    def parse_file(full_path, filename):

        log = SummaryLog()
        fpth = os.path.join(full_path, filename)

        with open(fpth, 'r') as ifs:    
            line = ifs.readline()
            while (line):
                log.parse_line(line)
                line = ifs.readline()

        return log
    
    def parse_line(self, line):
        self.data.append(line)

In [4]:
#
# JSON log type
#
class JsonLog:
    """
    Represents one JSON log file.
    """
    def __init__(self):
        pass
        
    @staticmethod
    def fix_JSON(JSON_text, filename):

        if (len(JSON_text) == 0):
            raise Exception("File {} is empty!".format(filename))

        #
        # Remove an extra comma at the end of the file
        #
        comma_idx = JSON_text.rfind(",")
        text_len = len(JSON_text)
        if (comma_idx >= text_len - 3):
            JSON_text = JSON_text[0:comma_idx] + "\n"


        #
        # Wrap submit logs with an array, cause there may be multiple submits at the same millisecond
        #
        if (filename.endswith("submit.json")):
            JSON_text = "[{}]".format(JSON_text)


        fixed = JSON_text

        try:
            parsed = json.loads(fixed)
            return fixed
        except:
            print(fixed)
            raise Exception("nende")

    @staticmethod
    def parse_file(full_path, filename):
        fpth = os.path.join(full_path, filename)
        parsed = None
        with open(fpth, 'r') as ifs:    
            try:

                parsed = json.load(ifs)
                if (filename.endswith("submit.json") and type(parsed) is dict):
                    raise Exception("!!")
            except:
                ifs.seek(0)
                text = ifs.read()

                parsed = JsonLog.fix_JSON(text, filename)
                ifs.close()

                stinfo = os.stat(fpth)                
                with open(fpth, 'w') as ofs:
                    ofs.write(parsed)
                    print("W: Rewritten the file {}!".format(filename))
                os.utime(fpth ,(stinfo.st_atime, stinfo.st_mtime))
        return parsed      

In [21]:
class Data:
    def __init__(self):
        self.verbose = False
        
        self.tasks = None
        
        
    def parse_team(self, team_name: str, team_names: list, verbose = False, validate_fix=False, generate_DRES=False):
        self.verbose = verbose
        
        print("==============\nTEAM: {}\n".format(team_name))
        
        ### main()
        if (validate_fix):
            self.validate_and_fix_input_data(team_name, team_names)
        
        self.calculate_server_ts_diff(team_name, team_names)
        
        if (generate_DRES):
            self.generate_DRES_logs(team_name, team_names)
        
        self.tasks = TaskDefs.parse_tasks(config.TASKS_JSON, config.TASKS_STARTS, config.TASK_MAPPING, verbose=self.verbose)
        
        ###
        
        self.verbose = False
     
    
    def validate_and_fix_input_data(self, team_name: str, team_names: list):
        print("%%% VALIDATING & FIXING %%%")
            
        for user_name in team_names:
            path = config.path(user_name)
            
            print("---\n\t +++ {} +++ \n\tDATA: {} \n".format(user_name, path))
                
            self.parsed[team_name] = self.validate_user(user_name, path)
            
        print("%%% DONE! %%%")
    
    def calculate_server_ts_diff(self, team_name: str, team_names: list):
        print("%%% CALCULATING SERVER TS DIFF %%%")
            
        diffs = []
        for user_name in team_names:
            path = config.path(user_name)
            
            print("---\n\t +++ {} +++ \n\tDATA: {} \n".format(user_name, path))
                
            ds = self.calculate_server_ts_diff_for_user(user_name, path)
            
            diffs.append(ds)
            
        
        #pp.pprint(diffs)
        
        mins = []
        
        for dfs in diffs:
            mins.append(np.min(np.array(dfs)))
            
        i = 0
        for user_name in team_names:
            print("DIFF MIN FOR {}: {}".format(user_name, mins[i]))
            i += 1
            
        print("%%% DONE! %%%")    
        
        return mins
        
    def generate_DRES_logs(self, team_name: str, team_names: list):
        print("%%% GENERATING DRES LOG FILES %%%")
            
        for user_name in team_names:
            path = config.path(user_name)
            
            print("---\n\t +++ {} +++ \n\tDATA: {} \n".format(user_name, path))
                
            self.generate_DRES_results_for_user(team_name, user_name, path)
            
        print("%%% DONE! %%%")
        

    
    def calculate_server_ts_diff_for_user(self, user_name, path):
        dir = config.dir_names()["actions"]
            
        diffs = []
        
        if (self.verbose):
            print("\t--- DIR: {} ---".format(dir))

        full_path = os.path.join(path, dir)
        
        for filename in os.listdir(full_path): 
            actions = JsonLog.parse_file(full_path, filename)
            
            for a in actions:
                ser_ts = a["metadata"]["serverTimestamp"]
                loc_ts = a["metadata"]["timestamp"]
                diff = ser_ts - loc_ts
                
                if ser_ts < 10000:
                    continue
                
                diffs.append(diff)

        if (self.verbose):
            print("\t--- DONE. ---")
        
            
        return diffs
    
    def generate_DRES_results_for_user(self, team_name, user_name, path):
        dir = config.dir_names()["requests"]
        out_dir = config.out_dir("dres")
        
        
            
        if (self.verbose):
            print("\t--- DIR: {} ---".format(dir))
            print("\t--- OUT: {} ---".format(out_dir))

        full_path = os.path.join(path, dir)
        
        # For each file in the 'requests' directory
        for filename in os.listdir(full_path): 
            if (not filename.endswith("result.json")):
                continue
                
            file_json = JsonLog.parse_file(full_path, filename)
            stinfo = os.stat(os.path.join(full_path, filename))                
            
            ts = file_json["timestamp"]
            file_json["request"]["timestamp"] = ts
            
            user_out_dir = os.path.join(out_dir, team_name, user_name)
            os.makedirs(user_out_dir, exist_ok = True)
            
            fpth = os.path.join(user_out_dir, "{}.json".format(ts))
            
            
            with open(fpth, "w") as ofs:
                ofs.write(json.dumps(file_json["request"], indent = 4))
                
            os.utime(fpth ,(stinfo.st_atime, stinfo.st_mtime))
                
        if (self.verbose):
            print("\t--- DONE. ---")
        
    def validate_user(self, user_name, path):
        for _, dir in config.dir_names().items():
            
            if (self.verbose):
                print("\t--- DIR: {} ---".format(dir))
                
            full_path = os.path.join(path, dir)
            for filename in os.listdir(full_path): 
                
                if (filename.endswith("log")):
                     SummaryLog.parse_file(full_path, filename)   
                else:
                    JsonLog.parse_file(full_path, filename)
                    
            if (self.verbose):
                print("\t--- DONE. ---")
                
                        

In [40]:
class TaskTarget:
    def __init__(self, video_ID, fnum_from, fnum_to):
        self._video_ID = video_ID
        self._fnum_from = fnum_from
        self._fnum_to = fnum_to
        
    def interval(self):
        return (self._fnum_from, self._fnum_to)
    
    def video_ID(self):
        return self._video_ID
    
    def __str__(self):
        s = "*** TaskTarget ***\n"
        s += f"\t\tvideo_ID: {self.video_ID()}\n"
        s += f"\t\tinterval: ({self.interval()[0]}, {self.interval()[1]})\n"
        
        return s
    

class Task:
    def __init__(self, name, type, from_ts, to_ts, 
                 text=None, target:TaskTarget = None):
        self._name = name
        
        # type \in { A, V, T }
        self._type = type
        self._from_ts = from_ts
        self._to_ts = to_ts
        
        self._text = text
        self._target = target
        
        
        if (self._type == "V" and self.text() != None):
            raise Exception("Visual tasks have no texts.")
        
        if (self._type == "A" and self.target() != None):
            raise Exception("AVS task cannot have target.")
        
    def name(self):
        return self._name
    
    def type(self):
        return self._type
    
    def timestamps(self):
        return (self._from_ts, self._to_ts)
    
    def times(self):
        tss = self.timestamps()
        return (utils.from_UNIX(tss[0]), utils.from_UNIX(tss[1]))
    
    def text(self):
        return self._text
    
    def target(self):
        return self._target
    
    
    def __str__(self):
        s = "\t*** Task ***\n"
        s += f"\tname: {self.name()}\n"
        s += f"\ttype: {self.type()}\n"
        s += f"\ttimestamps: ({self.timestamps()[0]}, {self.timestamps()[1]})\n"
        s += f"\ttimes: ({self.times()[0]}, {self.times()[1]})\n"
        s += f"\ttext: {self.text()}\n"
        s += f"\ttarget: {self.target().__str__()}\n"
        s += f"\t-----------------------------------\n"
        
        return s
        
    
        

class TaskDefs:
    def __init__(self):
        self.tasks = []
        
        
    def task(idx:int):
        return self.tasks[idx]
        
    def task(name:str):
        for t in self.tasks:
            if (t.name() == name):
                return t
                

    
    
    @staticmethod
    def parse_tasks(tasks_fpth, tasks_ends_fpth, mapping_fpth, verbose=False):
        print("%%% PARSING TASKS %%%")
            
        tasks = TaskDefs()
        
        tasks_JSON = None
        with open(tasks_fpth) as ifs:
            tasks_JSON = json.load(ifs)["tasks"]
        
        guid_to_video_ID = {}
        guid_to_FPS = {}
        with open(mapping_fpth) as ifs_mapping:
            mapping_reader = csv.reader(ifs_mapping, delimiter=',')
            for row in mapping_reader:
                if (len(row) < 3):
                    break
                guid_to_video_ID[row[0]] = int(row[1])
                guid_to_FPS[row[0]] = int(row[2])
        
        
        tdefs = TaskDefs()
        
        count = 0
        with open(tasks_ends_fpth) as ifs_starts:
            starts_reader = csv.reader(ifs_starts, delimiter=',')

            for row in starts_reader:
                count +=1
                t_name = row[2]
                
                if (verbose):
                    print("\t ...Task {} parsed.".format(t_name))
                
                t = utils.find_task_def(tasks_JSON, t_name)
                t_datetime = "{}T{}".format(row[0], row[1])
                
                
                t_type =t["taskType"][0:1]
                t_tsfrom = utils.UNIX_from_datetime(t_datetime)
                t_tsto = t_tsfrom + (t["duration"] * 1000) #ms
                
                text = None
                if (t_type == "T"):
                    text = ""
                    for c in t["components"]:
                        text = c["description"]
                        
                elif (t_type == "A"):
                    text = t["components"][0]["description"]
                
                t_target = None
                if (t_type != "A"):
                    guid = t["target"]["mediaItems"][0]["mediaItem"]
                    
                    fps = guid_to_FPS[guid]
                    vid_ID = guid_to_video_ID[guid]
                    
                    fr = t["target"]["mediaItems"][0]["temporalRange"]["start"]["value"] * fps
                    to = t["target"]["mediaItems"][0]["temporalRange"]["start"]["value"] * fps
                    
                    if (t["target"]["mediaItems"][0]["temporalRange"]["start"]["unit"] != "SECONDS"):
                        raise Exception("Invalid value type.")
                    
                    t_target = TaskTarget(
                        vid_ID, fr, to
                    )
                
                task = Task(
                    t_name, 
                    t_type,
                    t_tsfrom, t_tsto,
                    text,
                    t_target
                )
                tdefs.tasks.append(task)

        print(f"%%% Parsed {count} tasks. %%%")
        print("%%% DONE! %%%")
        return tdefs

    
    
    def __str__(self):
        s = "*** TaskDefs ***\n"
        
        for t in self.tasks:
            s += t.__str__() + "\n"
        
        return s

In [41]:
data = Data()

In [42]:

data.parse_team("SOMHunter", 
                  ["sh-patrik", "sh-vit"], 
                  verbose=True, 
                  validate_fix=False,
                  generate_DRES=False
                 )


TEAM: SOMHunter

%%% CALCULATING SERVER TS DIFF %%%
---
	 +++ sh-patrik +++ 
	DATA: ./data/sh-patrik 

	--- DIR: actions ---
	--- DONE. ---
---
	 +++ sh-vit +++ 
	DATA: ./data/sh-vit 

	--- DIR: actions ---
	--- DONE. ---
DIFF MIN FOR sh-patrik: -123
DIFF MIN FOR sh-vit: 1
%%% DONE! %%%
%%% PARSING TASKS %%%
	 ...Task v21-1 parsed.
	 ...Task a21-5 parsed.
	 ...Task t21-1 parsed.
	 ...Task v21-2 parsed.
	 ...Task a21-9 parsed.
	 ...Task t21-2 parsed.
	 ...Task v21-3 parsed.
	 ...Task t21-7 parsed.
	 ...Task a21-7 parsed.
	 ...Task a21-8 parsed.
	 ...Task v21-4 parsed.
	 ...Task t21-3 parsed.
	 ...Task a21-1 parsed.
	 ...Task t21-4 parsed.
	 ...Task v21-5 parsed.
	 ...Task t21-5 parsed.
	 ...Task v21-6 parsed.
	 ...Task v21-7 parsed.
	 ...Task a21-2 parsed.
	 ...Task v21-8 parsed.
	 ...Task v21-9 parsed.
	 ...Task v21-10 parsed.
	 ...Task v21-11 parsed.
	 ...Task v21-12 parsed.
	 ...Task v21-13 parsed.
	 ...Task a21-3 parsed.
	 ...Task v21-14 parsed.
	 ...Task a21-10 parsed.
	 ...Task v2

In [43]:
print(data.tasks)

*** TaskDefs ***
	*** Task ***
	name: v21-1
	type: V
	timestamps: (1624277458765, 1624277758765)
	times: (21-06-2021 14:10:58, 21-06-2021 14:15:58)
	text: None
	target: *** TaskTarget ***
		video_ID: 4178
		interval: (1875, 1875)

	-----------------------------------

	*** Task ***
	name: a21-5
	type: A
	timestamps: (1624277828154, 1624278128154)
	times: (21-06-2021 14:17:08, 21-06-2021 14:22:08)
	text: Find shots of a person holding or waving a flag.
	target: None
	-----------------------------------

	*** Task ***
	name: t21-1
	type: T
	timestamps: (1624278204021, 1624278624021)
	times: (21-06-2021 14:23:24, 21-06-2021 14:30:24)
	text: Graffity artists spraying purple outline of the letters RTH. Letters are white/green on a wall with a blueish graffity on the left. He wears a blue jacket over a green hoodie.

	target: *** TaskTarget ***
		video_ID: 3085
		interval: (7850, 7850)

	-----------------------------------

	*** Task ***
	name: v21-2
	type: V
	timestamps: (1624278710026, 162

In [17]:
print(utils.from_UNIX(1624277458765))
print(utils.from_UNIX(1624277758765))


21-06-2021 14:10:58
21-06-2021 14:15:58


In [11]:
data.parse_team("CollageHunter", 
                  ["collage-jakub", "collage-premek"], 
                  verbose=True, 
                  validate_fix=False, 
                  generate_DRES=False
                 )

TEAM: CollageHunter

%%% CALCULATING SERVER TS DIFF %%%
---
	 +++ collage-jakub +++ 
	DATA: ./data/collage-jakub 

	--- DIR: actions ---
	--- DONE. ---
---
	 +++ collage-premek +++ 
	DATA: ./data/collage-premek 

	--- DIR: actions ---
	--- DONE. ---
DIFF MIN FOR collage-jakub: 14
DIFF MIN FOR collage-premek: 5
%%% DONE! %%%


In [12]:
data.parse_team("LegacySOMHunter", 
                  ["legacy-tereza", "legacy-franta-tomas"], 
                  verbose=True, 
                  validate_fix=False, 
                  generate_DRES=False
                 )

TEAM: LegacySOMHunter

%%% CALCULATING SERVER TS DIFF %%%
---
	 +++ legacy-tereza +++ 
	DATA: ./data/legacy-tereza 

	--- DIR: actions ---
	--- DONE. ---
---
	 +++ legacy-franta-tomas +++ 
	DATA: ./data/legacy-franta-tomas 

	--- DIR: actions ---
	--- DONE. ---
DIFF MIN FOR legacy-tereza: 3
DIFF MIN FOR legacy-franta-tomas: 1
%%% DONE! %%%
