# Analysis of NK Legpuzzelen results

Define functions and data structures

In [1]:
from __future__ import annotations
from dataclasses import dataclass

from typing import List, Dict

DEBUG = False

def edit_distance(a,b,ratio=False,print_matrix=False) :
	if a == '' :
		return len(b)
	if b == '' :
		return len(a)

	n = len(a)
	m = len(b)
	lev = [[0] * (m+1) for _ in range(n+1)] # np.zeros((n+1,m+1))

	for i in range(0,n+1) :
		lev[i][0] = i 
	for i in range(0,m+1) :
		lev[0][i] = i

	for i in range(1,n+1) :
		for j in range(1,m+1) :
			insertion = lev[i-1][j] + 1
			deletion = lev[i][j-1] + 1
			substitution = lev[i-1][j-1] + (1 if a[i-1]!= b[j-1] else 0)
			lev[i][j] = min(insertion,deletion,substitution)

	if print_matrix :
		print(lev)

	if ratio :
		return (n+m-lev[n][m])/(n+m)
	else :
		return lev[n][m]

@dataclass
class Entry:
    round: str
    team_name: str
    position: int
    time_seconds: int
    time: str
    searchtext: str

    def __init__(self, round, position, teamname, time):
        self.round = round
        self.position = int(position)
        self.team_name = teamname
        self.time = time
        self.time_seconds = Entry.time_to_seconds(time)
        self.searchtext = ''.join([a for a in teamname.upper() if 'A' <= a <= 'Z' or '0' <= a <= '9'])


    def time_to_seconds(time):
        parts = time.split(':')
        return 3600*int(parts[0]) + 60*int(parts[1]) + int(parts[2])
    
    def __repr__(self):
        return f'{self.position:2d}. {self.time_seconds:5d}s {self.team_name}'

@dataclass
class PreliminaryEntry(Entry):
    final: FinalEntry = None

    def __init__(self,round, position, teamname, time):
        super().__init__(round, position, teamname, time)

    def __repr__(self):
        suffix = f" ({self.final.position})" if self.final else ""
        return super().__repr__() + suffix
    

@dataclass
class FinalEntry(Entry):
    preliminary: PreliminaryEntry = None
    matchlevel: int = -1
    time_factor: float = 0  # <1 voorronde sneller, >1 finale sneller
    
    def __init__(self, position, teamname, time):
        super().__init__('final', position, teamname, time)

    def __repr__(self):
        suffix = f" ({self.preliminary.position}. {self.preliminary.round} - {self.time_factor:.3f})" if self.preliminary else ""
        return super().__repr__() + suffix
    
    def set_preliminary(self, entry: PreliminaryEntry):
        if self.preliminary:
              self.preliminary.final = None
              
        self.preliminary = entry
        entry.final = self

        self.matchlevel = edit_distance(self.searchtext, entry.searchtext)
        self.time_factor = entry.time_seconds/self.time_seconds

@dataclass
class Preliminary:
    name: str
    entries: List[PreliminaryEntry]

    def __init__(self, name):
        self.name = name
        self.entries = []

    def time_factors(self) -> List[float]:
        return [a.final.time_factor for a in self.entries if a.final]

    def __repr__(self):
        time_factors = self.time_factors()
        result = f"{self.name} {', '.join([f'{f:.3f}' for f in time_factors])}\n"
        for entry in self.entries:
            result += repr(entry) + '\n'
        return result


Load preliminaries and final data 

In [2]:
import os
import csv

final: List[FinalEntry] = []

with open(f'data/final.csv', 'r') as f:
    reader = csv.reader(f)
    for line in reader:
        entry = FinalEntry(*line)
        final.append(entry)

preliminaries : Dict[str, Preliminary] = {}

for filename in os.listdir('data'):
    if filename == 'final.csv':
        continue

    round_name = filename[:-4]
    preliminary = Preliminary(round_name)
    with open(f'data/{filename}', 'r') as f:
        reader = csv.reader(f)
        for line in reader:
            entry = PreliminaryEntry(round_name, *line)
            preliminary.entries.append(entry)
    preliminaries[round_name] = preliminary

Link teams from the final with teams from the preliminaries

In [3]:
for final_entry in final:
    min_distance_position = (1000, 1000)
    min_entry = None

    for p in preliminaries.keys():
        for preliminary_entry in preliminaries[p].entries:
            distance = edit_distance(final_entry.searchtext, preliminary_entry.searchtext)
            if (distance, preliminary_entry.position) < min_distance_position:
                min_distance_position = (distance, preliminary_entry.position)
                min_entry = preliminary_entry

    final_entry.set_preliminary(min_entry)

Print debug information

In [4]:
if DEBUG:
    print('final')

    for entry in final:
        print(entry)
    print()

    for k in preliminaries.keys():
        print(preliminaries[k])

Select a team name to analyze

In [5]:
candidate_team_name = "😎"
# candidate_team_name = "High Five"

Analyze the selected team.

In [6]:
candidate_preliminary = None
candidate_entry = None
for k in preliminaries:
    for entry in preliminaries[k].entries:
        if entry.team_name == candidate_team_name:
            candidate_entry = entry
            candidate_preliminary = preliminaries[k]

def mean(l):
    return sum(l)/len(l)

def time(seconds):
    rounded = abs(int(seconds))
    sign = "-" if seconds < 0 else " "
    return f"{sign}{rounded//3600:02d}:{rounded%3600//60:02d}:{rounded%60:02d}"

final_factor = mean(candidate_preliminary.time_factors())
projected_final_time = candidate_entry.time_seconds/final_factor
projected_final_position = len([i for i in final if i.time_seconds < projected_final_time]) + 1

final_result = ('final', time(projected_final_time), projected_final_position, '--:--:--')

result_data = []

for k in preliminaries:
    preliminary = preliminaries[k]
    projected_time = projected_final_time * mean(preliminary.time_factors())
    projected_position = len([i for i in preliminary.entries if i.time_seconds < projected_time]) + 1
    diff_with_third = preliminary.entries[2].time_seconds - projected_time
    result_data.append((preliminary.name, time(projected_time), projected_position, diff_with_third))

Show the results.

Shown are the round name, projected time if the selected team participated in that round, projected position in that round and projected time difference with third place of that round if it is a preliminary

Ordered by: final first, then time difference with third place

In [7]:
import tabulate

sorted_result_data = [(n, t, p, time(d)) for (n, t, p, d) in sorted(result_data, key= lambda e: e[3], reverse=True)]


table = tabulate.tabulate([final_result] + sorted_result_data, ['Round', 'Time', 'Pos', 'Time to 3rd'], colalign=('left', 'left', 'right', 'right'))
print(table)

Round               Time        Pos    Time to 3rd
------------------  --------  -----  -------------
final               01:16:20     24       --:--:--
vorden              01:13:16      1       00:20:01
roelofarendsveen-2  01:15:35      2       00:16:46
meterik             01:03:07      2       00:13:10
zwartebroek         00:57:39      2       00:08:38
obdam-2             01:14:43      2       00:07:24
roelofarendsveen-4  01:04:56      3       00:06:14
meppel              00:55:42      2       00:05:41
roelofarendsveen-3  00:57:09      3       00:05:03
de-lier             00:54:42      1       00:03:28
teteringen          01:05:05      3       00:03:21
obdam               00:56:46      1       00:02:55
hooghalen           01:11:59      2       00:02:36
sleeuwijk           01:15:03      3       00:00:20
de-lier-2           01:28:08      4      -00:00:32
reek-2              01:00:29      6      -00:11:02
