In [None]:
import pandas as pd 
from collections import defaultdict
import uuid
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError
import google.auth
import io, json
from pathlib import Path

In [None]:
tlkt_df = pd.read_csv("DATABASE.xlsx - TLKT.csv")
print (tlkt_df.head())

In [None]:
class UniversialParticipant: 
    
    def __init__(self, handles = {}, name = "", school = "") -> None:
        self.id = uuid.uuid4()
        self.handles = handles
        self.name = name
        self.school = school
        self.rating = 0 
        pass
    def __str__(self) -> str:
        return f"id = {self.id}, handles = " + str(self.handles) + ", name = " + self.name + ", school = " + self.school

class PartitipantsPool: 
    def __init__(self) -> None:
        self.all = defaultdict(UniversialParticipant) 
        pass
    def __str__(self) -> str:
        s = ""
        for _, p in self.all.items(): 
            s += p.__str__() + "\n"
        return s

    def access(self, handles = {}, name = "", school = ""):
        for _, p in self.all.items(): 
            if p.name == name and p.school == school: 
                p.handles = set.union(p.handles, handles)
                return p
            if len(set.intersection(p.handles, handles)) > 0: 
                p.handles = set.union(p.handles, handles)
                return p
        newp = UniversialParticipant(handles=handles, name=name, school=school)
        self.all[newp.id] = newp 
        return newp

Participants = PartitipantsPool()

def parsescore(s) -> float: 
    try:
        return float(s)
    except ValueError:
        return float(''.join([(c if (c >= '0' and c <= '9') else '') for c in s]))

def parserankingrow(row, taskcount):
    return ([row[0], ' '.join(row[1:-taskcount-1])] + [parsescore(score) for score in row[-taskcount-1 : ]])

def getAbbreviatedName (contest_type, contest_name, task_name):
    if (contest_type == "Free Contest"): 
        return "fc{:03}_{}".format(int(contest_name.split()[-1]), task_name.lower())
    if (contest_type == "Beginner Free Contest"): 
        return "fcb{:03}_{}".format(int(contest_name.split()[-1]), task_name.lower())
    if (contest_type == "Testing Round"): 
        if (contest_name.startswith("Testing Round 2.")):
            return "fct002_{}_{}".format(contest_name.split('.')[-1], task_name.lower())
        else:
            return "fct{:03}_{}".format(int(contest_name.split()[-1]), task_name.lower())
    if (contest_name.startswith("IOI")):
        return "fc_ioipc{:04}_{}".format(int(contest_name.split()[-1]), task_name.lower())
    if (contest_name.startswith("Free Contest 4 Years")):
        return "fc_4years_{}".format(task_name.lower())
    if (contest_name.startswith("Free Contest Cup")):
        return "fcc{}_{}".format(''.join(contest_name.split()[3:]).lower(), task_name.lower())
    if (contest_name.startswith("Happy Wedding")):
        return "fc_hwc_r{}_{}".format(contest_name.split()[-1], task_name.lower())
    if (contest_name.startswith("PreTST 2018 #2")):
        return "fc_pretst2018_2_{}".format(task_name.lower())
    pass

def getDiffCap (contest_type, contest_name):
    if (contest_type == "Free Contest"): 
        return 1.5
    if (contest_type == "Beginner Free Contest"): 
        return 0.75
    if (contest_type == "Testing Round"): 
        if (contest_name.startswith("Testing Round 2.")):
            return 1.5
        else:
            return 1.5
    if (contest_name.startswith("IOI")):
        return 2.0
    if (contest_name.startswith("Free Contest 4 Years")):
        return 2.0
    if (contest_name.startswith("Free Contest Cup")):
        return 2.0
    if (contest_name.startswith("Happy Wedding")):
        return 2.0
    if (contest_name.startswith("PreTST 2018 #2")):
        return 2.0
    return 1.5

class RankingBoard:

    def __init__(self, filedir = "", judgeformat = "cms", contesttype = "Free Contest", contestname = "") -> None:
        self.flag = False
        file = Path(filedir) 
        self.parsed = False
        self.maxtaskpoint = []
        if file.is_file():
            file = open(filedir, encoding="utf8") 
            if judgeformat == "cms":
                try: 
                    self.header = file.readline().split()
                    self.table =  [row.split() for row in file.readlines()]
                    self.tasks = self.header[2:-1]
                    taskcount = len(self.tasks) 
                    newtable = [] 
                    for row in self.table:
                        if len(row) >= taskcount + 1:
                            newtable.append(parserankingrow(row, taskcount)) 
                    self.table = newtable
                    self.parsed = bool(len(self.table))
                    if contesttype == "Free Contest":
                        self.maxtaskpoint = [50.0 for _ in self.tasks]
                    elif contesttype == "Beginner Free Contest":
                        self.maxtaskpoint = [50.0 for _ in self.tasks]
                    elif contesttype == "Testing Round":
                        self.maxtaskpoint = [50.0 for _ in self.tasks]
                    elif contesttype == "Đặc biệt":
                        POINTS = {"PreTST 2018 #2" : 50.0}
                        if (contestname in POINTS):
                            self.maxtaskpoint = [POINTS[contestname] for _ in self.tasks]
                        else: 
                            self.maxtaskpoint = [100.0 for _ in self.tasks]
                    else: 
                        print (f"Contest type '{contesttype}' unknown, setting to default task value = 50.")
                        self.maxtaskpoint = [50.0 for _ in self.tasks]
                    # FIX maxpoint by all contest max points 
                    _amax = 0.0
                    for row in self.table:
                        for x in row[2:-1]: 
                            _amax = max(_amax, x) 
                    self.maxtaskpoint = [max(_, _amax) for _ in self.maxtaskpoint]
                except ValueError:
                    self.flag = True 
                    print (f"Filedir = {filedir}, Value error exception.") 
                    return
                except IndexError:
                    self.flag = True 
                    print (f"Filedir = {filedir}, Index error exception.") 
            elif judgeformat == "ttjudge":
                pass
            else: 
                raise NotImplementedError("Judge format is not yet supported.") 
        
    def getTrueParticipantsCount(self, zeroScoreParticipantWeight = 0.0):
        c = 0.0
        for row in self.table:
            if row[-1] == 0.0: 
                c += zeroScoreParticipantWeight
            elif row[-1] > 0.0:
                c += 1
        return c
    def __getTaskIndex(self, taskname):
        pid = -1
        for i, x in enumerate(self.header):
            if taskname == x:
                pid = i
        if pid == -1:
            raise ValueError("Problem name doesn't exist.")
        return pid-2

    def getACcount(self, taskname): 
        pid = self.__getTaskIndex(taskname)
        ret = 0
        for row in self.table:
            ret += int(row[pid + 2] == self.maxtaskpoint[pid])
        return ret

    def getMaxTaskPoint(self, taskname): 
        pid = self.__getTaskIndex(taskname)
        return self.maxtaskpoint[pid]
    
    def getTotalPoints(self, taskname): 
        pid = self.__getTaskIndex(taskname)
        ret = 0.0
        for row in self.table:
            ret += row[pid + 2]
        return ret

    

In [None]:
print (getAbbreviatedName("Free Contest", "Free Contest 123", "BULLS"))
print (getAbbreviatedName("Đặc biệt", "Happy Wedding Contest - Round 4", "BULLS"))
print (getAbbreviatedName("Đặc biệt", "PreTST 2018 #2", "AUDITION"))

In [None]:
class GoogleDriveFile: # refer to the "gfile" attribute 

    def __init__(self, name = "", link = "") -> None:
        self.name = name 
        self.link = link
        self.extension = name[-3:]
        _u = (link.split('/'))
        self.id = "" 
        self.flag = False
        self.localdir = "" 
        self.downloaded = False
        for u in _u: 
            if len(u) > len(self.id):
                self.id=u
        pass

    def __str__(self) -> str:
        return f"File name: {self.name}, file id {self.id}."

    def isempty(self) -> bool: 
        return not bool(self.link)
    
    def download(self, destination):
        if (Path(destination).exists() and Path(destination).is_file()): 
            print (f"File {destination} existed.")
            self.done = True
            return
        #put json credentials her from service account or the like
        # More info: https://cloud.google.com/docs/authentication

        credz, _ = google.auth.load_credentials_from_file("gdrivefile-downloader-ef880c6487fc.json")
        drive_service = build('drive', 'v3', credentials=credz)
        request = drive_service.files().get_media(fileId=self.id)
        fh = io.FileIO(destination, 'wb') # this can be used to write to disk
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            try:
                status, done = downloader.next_chunk()
                print("Download %d%%." % int(status.progress() * 100))
            except HttpError as error:
                print(f'[{self.id}][{destination}] An error occurred: {error}')
                self.flag = True
                break
        if not self.flag: 
            self.downloaded = True
            self.localdir = destination

class Task: 
    def __init__(self) -> None:
        self.name = "" 
        self.statement_gfile = GoogleDriveFile() 
        self.submissions = [] # list of Submissions associated with task, typically solutions   
        self.editorial_gfile = GoogleDriveFile() 
        self.testdata_gfile = GoogleDriveFile() 
        self.max_score = 0
        pass

    def __str__(self) -> str:
        ret = f" Task name: {self.name}\n"
        return ret

class Submission:
    def __init__(self) -> None:
        self.gfile = GoogleDriveFile() 
        self.status = "main" 
        # status in ["main", "correct", "incorrect"]
        # note: this is a simplified version of Polygon status system 

        self.task = Task()  
        self.score = 0
        pass



class Contest:
    def __init__(self) -> None:
        self.name = ""
        self.tasks = defaultdict(Task)
        self.ranking_gfile = GoogleDriveFile()
        self.ranking_board = RankingBoard()
        self.type = ""
        pass

    def setType(self, type):
        self.type = type

    def __str__(self) -> str:
        ret = f"Contest name: {self.name}\nContest Details: Type: {self.type}\nRanking file: {self.ranking_gfile}\n"
        for task in self.tasks: 
            ret += self.tasks[task].__str__()
        return ret

In [None]:
contests = defaultdict(Contest) 

for row_id in range(0, len(tlkt_df)): 
    contest = tlkt_df["Kỳ thi"][row_id]
    contests[contest].name = contest
    tlkt_type = tlkt_df["Loại tư liệu kỳ thi"][row_id]
    file_name = tlkt_df["Tên file (đích)"][row_id]
    task = file_name[:-4]
    file_glink = tlkt_df["Link Google Drive"][row_id] 
    file = GoogleDriveFile(file_name, file_glink)

    if tlkt_type == "Đề bài":
        contests[contest].tasks[task].statement_gfile = file
        contests[contest].tasks[task].name = task
    elif tlkt_type == "Bảng điểm": 
        if (file.extension == "png"):
            if contests[contest].ranking_gfile.name == "":
                contests[contest].ranking_gfile = file    
        if (file.extension == "csv") or (file.extension == "txt"):
            contests[contest].ranking_gfile = file
    

In [None]:
ranking_ext = defaultdict(int)
for contest in contests:
    # print(contest[1])
    ranking_ext[contests[contest].ranking_gfile.extension] += 1
    # print(contests[contest].ranking_gfile) 
print (len(contests.items())) 
print (ranking_ext)

In [None]:
# LOAD CONTEST TYPE
kt_df = pd.read_csv("DATABASE.xlsx - KT.csv")
for row_id in range(len(kt_df)): 
    contest_name = kt_df["Kỳ thi"][row_id]
    contest_type = kt_df["Loại kỳ thi"][row_id] 
    contests[contest_name].setType(contest_type)

In [None]:
localdir_head = "TLKT/"
for contest_name, contest in contests.items():
    if (not contest.ranking_gfile.isempty()):
        localdir = localdir_head + contest_name + "/" 
        Path(localdir).mkdir(parents=True, exist_ok=True)
        localdir += "ranking." + contest.ranking_gfile.extension;
        contest.ranking_gfile.download(localdir)
        contest.ranking_board = RankingBoard(localdir, judgeformat="cms", contesttype=contest.type, contestname=contest_name)



In [None]:
# ANALYTICS
fo_dict = defaultdict(list)
fo_dict["Task"] = [] 
fo_dict["MaxPointInContest"] = [] 
fo_dict["ParticipantsCount"] = [] 
fo_dict["ACCount"] = [] 
fo_dict["TotalPointInContest"] = []  
fo_dict["DifficultyMultiplier"] = []  
for contest_name, contest in contests.items():
    if (contest.ranking_board.parsed): 
        for task in contest.ranking_board.tasks: 
            abbrname = getAbbreviatedName(contest.type, contest_name, task) 
            mpic = contest.ranking_board.getMaxTaskPoint(task)
            pc = contest.ranking_board.getTrueParticipantsCount() 
            ac = contest.ranking_board.getACcount(task) 
            tp = contest.ranking_board.getTotalPoints(task)
            dm = getDiffCap(contest.type, contest_name) 
            fo_dict["Task"].append(abbrname) 
            fo_dict["MaxPointInContest"].append(mpic)
            fo_dict["ParticipantsCount"].append(pc)
            fo_dict["ACCount"].append(ac)
            fo_dict["TotalPointInContest"].append(tp)
            fo_dict["DifficultyMultiplier"].append(dm)
fo_df = pd.DataFrame(fo_dict) 
fo_df.to_csv("analysis.csv")  

