# Grundlegende Infos

**Aufgabe:**
Einlesen von Ergebnissdateien im LENEX Format zur Ermittlung der Bestleistungen der einzelnen Sportler

**Eingabedateien:** LEF-Dateien

# Notwendige Imports & Vorbereitungen

## Imports

In [203]:
import pandas as pd
import numpy as np
from lxml import etree
from datetime import datetime, timedelta, date
import sqlite3
import os

## Benötigte Klassen

In [204]:
class Athlete:

    def __init__(self, id, name, first_name, club, gender, birth_date):
        self.id = int(id)
        self.name = name
        self.first_name = first_name
        self.club = club
        self.birth_date = datetime.strptime(birth_date, "%Y-%m-%d")
        self.gender = gender
        self.results = {}

    def get_birth_year(self):
        return self.birth_date.year

class Result:

    def __init__(self, event_id, swim_time, stroke, distance):
        self.event_id = int(event_id)
        self.stroke = stroke
        self.distance = distance
        # Parse swim time
        t = datetime.strptime(swim_time,"%H:%M:%S.%f")
        self.time = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond)

    def get_style_key(self):
        return "%s:%s" % (self.stroke, str(self.distance))

    def get_time(self):
        return self.time.total_seconds()

# Einstellungen

## Region

In [205]:
region_code_sax = 20

## Max Jahrgang

In [206]:
oldest_birth_year = 2006

## Input Dir

In [207]:
input_dir = "../../data/"

## Distances to check

In [208]:
distances_surface = [50, 100, 200, 800]
distances_apnea = [50]
distances_im = [100, 400]

# Funktionen

## Einlesen der Strecken

In [209]:
def read_events (root):
    events = {}
    for event in root.xpath("//EVENT"):
        style = event.xpath("SWIMSTYLE")[0]

        #Kick out relays
        if int(style.get("relaycount")) > 1:
            continue

        #Kick out not relevant distances
        if style.get("stroke") == "SURFACE":
            if int(style.get("distance")) not in distances_surface:
                continue

        if style.get("stroke") == "APNEA":
            if int(style.get("distance")) not in distances_apnea:
                continue

        if style.get("stroke") == "IMMERSION":
            if int(style.get("distance")) not in distances_im:
                continue

        events[int(event.get("eventid"))] = [style.get("stroke"), int(style.get("distance"))]

    return events

## Einlesen der relevanten Athleten + Ergebnisse

In [210]:
def read_athletes (root, events):
    athletes = {}

    for athlete_obj in root.xpath("//ATHLETE"):
        # Kick out athletes which are not part of the right club
        club_region_id = athlete_obj.xpath('./ancestor::CLUB/@region')

        if int(club_region_id[0]) != region_code_sax:
            continue

        # Create the athlete
        club = athlete_obj.xpath('./ancestor::CLUB/@name')[0]
        athlete = Athlete(athlete_obj.get("athleteid"), athlete_obj.get("lastname"), athlete_obj.get("firstname"), club, athlete_obj.get("gender"), athlete_obj.get("birthdate"))

        # Read the results
        for result_obj in athlete_obj.xpath("RESULTS/RESULT"):
            event_id = int(result_obj.get("eventid"))

            # Ignore not relevant distances
            if event_id not in events:
                continue

            stroke = events[event_id][0]
            distance = events[event_id][1]

            # Kick out not finished results
            if result_obj.get("status") is not None:
                continue

            result = Result(event_id, result_obj.get("swimtime"), stroke, distance)

            athlete.results[event_id] = result

        # Add the finished athlete object to the list
        athletes[athlete.id] = athlete

    # Ausgabe Ergebnis
    print("Es wurden %d Athleten eingelesen" % len(athletes))

    return athletes

In [211]:
def read_athletes_nl (root, events):
    athletes = {}
    club_ids = [27811, 21975, 29262, 22402, 22873]

    for athlete_obj in root.xpath("//ATHLETE"):
        # Kick out athletes which are not part of the right club
        clubid = athlete_obj.xpath('./ancestor::CLUB/@clubid')

        if int(clubid[0]) not in club_ids:
            continue

        # Create the athlete
        club = athlete_obj.xpath('./ancestor::CLUB/@name')[0]
        athlete = Athlete(athlete_obj.get("athleteid"), athlete_obj.get("lastname"), athlete_obj.get("firstname"), club, athlete_obj.get("gender"), athlete_obj.get("birthdate"))

        # Read the results
        for result_obj in athlete_obj.xpath("RESULTS/RESULT"):
            event_id = int(result_obj.get("eventid"))

            # Ignore not relevant distances
            if event_id not in events:
                continue

            stroke = events[event_id][0]
            distance = events[event_id][1]

            # Kick out not finished results
            if result_obj.get("status") is not None:
                continue

            result = Result(event_id, result_obj.get("swimtime"), stroke, distance)

            athlete.results[event_id] = result

        # Add the finished athlete object to the list
        athletes[athlete.id] = athlete

    # Ausgabe Ergebnis
    print("Es wurden %d Athleten eingelesen" % len(athletes))

    return athletes

# SQL Funktionen

## Anlegen der Datenbank

In [212]:
def create_database():
    db_path = "../../working/db_results.sqlite"
    if os.path.exists(db_path):
        os.remove(db_path)
    conn = sqlite3.connect(db_path)

    stmt = """
    create table athletes
    (
        id         integer
            constraint athletes_pk
                primary key autoincrement,
        name       TEXT,
        first_name TEXT,
        gender     TEXT,
        year       TEXT,
        club       TEXT
    );
    """
    conn.execute(stmt)

    stmt = """
    create table results
    (
        id         integer
            constraint athletes_pk
                primary key autoincrement,
        aID         integer,
        style       TEXT,
        time      float
    );
    """

    conn.execute(stmt)

    return conn

## Abfragen

In [213]:
def get_athlete_id (conn, name, first_name, year):
    stmt = """
        SELECT id from athletes WHERE name=? AND first_name=? AND year=?
    """
    c = conn.cursor()
    result = c.execute(stmt, (name, first_name, year))
    id = c.fetchone()
    c.close()

    if id is not None:
        return id[0]
    else:
        return None

def get_result_by_athlete (conn, athlete_id, style):
    stmt = """
        SELECT id, time from results WHERE aID=? AND style=?
    """
    c = conn.cursor()
    result = c.execute(stmt, (athlete_id, style))
    row = c.fetchone()
    c.close()

    if row is not None:
        return [row[0], row[1]]
    else:
        return None


## Inserts

In [214]:
def insert_athlete (conn, name, first_name, year, gender, club):
    # Check if athlete exists
    athlete_id = get_athlete_id(conn, name, first_name, year)
    if athlete_id is not None:
        return athlete_id

    # Insert Athlete
    c = conn.cursor()

    stmt = """
    insert into athletes (name, first_name, gender, "year", club) values (?, ?, ?, ?, ?);
    """

    c.execute(stmt, (name, first_name, gender, year, club))
    conn.commit()
    athlete_id = c.lastrowid
    c.close()

    return athlete_id

def insert_result (conn, athlete_id, style, seconds):
    # Check if better result
    current_result = get_result_by_athlete(conn, athlete_id, style)
    if current_result is None:
        # Insert Result
        c = conn.cursor()

        stmt = """
        insert into results (aid, style, time) values (?, ?, ?);
        """

        c.execute(stmt, (athlete_id, style, seconds))
        conn.commit()
        c.close()

    elif current_result[1] > seconds:
        # Insert Result
        c = conn.cursor()

        stmt = """
        update results SET time=? WHERE id=?;
        """

        c.execute(stmt, (seconds, current_result[0]))
        conn.commit()
        c.close()

# Ausführung

In [215]:
conn = create_database()

In [216]:
for file in os.listdir(input_dir):
    input_file = os.path.join(input_dir, file)
    if not os.path.isfile(input_file):
        continue

    print("Read File: %s" % input_file)

    tree = etree.parse(input_file)
    root = tree.getroot()


    events = read_events(root)

    if file == "x2023NK results.lef":
        athletes = read_athletes_nl(root, events)
    else:
        athletes = read_athletes(root, events)

    # Hinzufügen der Athleten + Ergebnisse
    for athlete_id, athlete in athletes.items():
        db_id = insert_athlete(conn, athlete.name, athlete.first_name, athlete.get_birth_year(), athlete.gender, athlete.club)

        for key, result in athlete.results.items():
            insert_result(conn, db_id, result.get_style_key(), result.get_time())

Read File: ../../data/x2023NK results.lef
Es wurden 7 Athleten eingelesen
Read File: ../../data/230325-Erfurt-PR.lef
Es wurden 18 Athleten eingelesen
Read File: ../../data/230401-Zwickau-PR.lef
Es wurden 115 Athleten eingelesen
Read File: ../../data/230121-Erfurt-PR.lef
Es wurden 25 Athleten eingelesen
Read File: ../../data/230512-Leipzig-PR.lef
Es wurden 59 Athleten eingelesen
Read File: ../../data/230617-Halle_(Saale)-PR.lef
Es wurden 22 Athleten eingelesen
Read File: ../../data/2023-07-02-Dresden-Pr.lef
Es wurden 68 Athleten eingelesen


# Auslesen der Ergebnisse

## Funktionen

In [217]:
def convert_timestamp(time_float):
    x = timedelta(seconds=time_float)
    days, seconds = x.days, x.seconds
    hours = days * 24 + seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60

    return "%s:%s,%s" % (str(minutes), str(seconds), str(x.microseconds // 10000))

## Abfragen

In [218]:
styles = []

for distance in distances_surface:
    styles.append("SURFACE:%s" % (str(distance)))

for distance in distances_apnea:
    styles.append("APNEA:%s" % (str(distance)))

for distance in distances_im:
    styles.append("IMMERSION:%s" % (str(distance)))

In [219]:
base_stmt = """
SELECT
    a.name,
    a.first_name,
    a.gender,
    a.year AS birth_year,
    a.club,
    r.style,
    r.time
FROM
    athletes AS a
LEFT JOIN results as r ON r.aID = a.id
WHERE
    a.year >= %s AND
    a.gender = "%s" AND
    r.style = "%s"
ORDER BY time
"""

excel_writer = pd.ExcelWriter("../../finished/overview.xlsx")

for gender in ["M", "F"]:
    for style in styles:
        stmt = base_stmt % (str(oldest_birth_year), gender, style)

        data_frame = pd.read_sql(stmt, conn)
        data_frame["time"] = data_frame["time"].apply(convert_timestamp)

        style_name = style.replace(":", "_")
        data_frame.to_excel(excel_writer, "%s-%s" % (gender, style_name))

excel_writer.close()