# Grundlegende Infos

**Aufgabe:**
Einlesen von Ergebnissdateien im LENEX Format zur vereinfachten Auswertung der Sachsenliga

**Eingabedateien:** LEF-Dateien

# Notwendige Imports & Vorbereitung

## Imports

In [49]:
import pandas as pd
import numpy as np
from lxml import etree
from datetime import datetime, timedelta, date

## Benötigte Klassen

In [50]:
class Athlete:

    def __init__(self, id, name, first_name, club, gender, birth_date):
        self.id = int(id)
        self.name = name
        self.first_name = first_name
        self.club = club
        self.birth_date = datetime.strptime(birth_date, "%Y-%m-%d")
        self.gender = gender
        self.category = None
        self.results = {}

    def get_birth_year(self):
        pass

class Result:

    def __init__(self, event_id, swim_time):
        self.event_id = int(event_id)
        # Parse swim time
        t = datetime.strptime(swim_time,"%H:%M:%S.%f")
        self.time = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond)
        self.points = 0

    def get_time(self):
        return self.time.total_seconds()

# Einstellungen

## Region

In [51]:
region_code_sax = 20

## Eingabedatei

In [52]:
input_file = "../data/2023-07-02-Dresden-Pr.lef"
output_file = "../result/ljs.xlsx"
tree = etree.parse(input_file)
root = tree.getroot()

## Kategorien

In [53]:
categories = {
    "2006 & 2007": [2006, 2007],
    "2008 & 2009": [2008, 2009],
    "2010 & 2011": [2010, 2011],
    "2012 & 2013": [2012, 2013],
    "2014 & jünger": [2014, 2023],
}

## Punkte

In [54]:
points = {
    1: 50,
    2: 46,
    3: 42,
    4: 39,
    5: 36,
    6: 33,
    7: 30,
    8: 27,
    9: 24,
    10: 22,
    11: 20,
    12: 18,
    13: 16,
    14: 14,
    15: 12,
    16: 10,
    17: 8,
    18: 7,
    19: 6,
    20: 5,
    21: 4,
    22: 3,
    23: 2,
    24: 1
}

def convert_place_to_points(place):
    if place in points:
        return points[place]
    else:
        return 1

## Übersicht über alle relevanten Vereine
Darstellung aller Clubs, welche aus Sachsen kommen

In [55]:
headers = ["Name"]
rows = []
for club in root.xpath("//CLUB"):
    if int(club.get("region")) != region_code_sax:
        continue
    rows.append([club.get("name"), ])

df_clubs = pd.DataFrame(rows, columns=headers)
df_clubs

Unnamed: 0,Name
0,1. Chemnitzer Tauchverein e.V.
1,SC DHfK Leipzig Flossenschwimmen
2,SC Riesa (TC)
3,Startgemeinschaft Dresden
4,Tauchclub NEMO Plauen e.V.
5,TC Delitzsch


## Übersicht der angebotenen Strecken
Darstellung aller Strecken, welche für die Auswertung relevant sind (Alles außer Staffeln)

In [56]:
headers = ["ID", "Gender", "Style", "Distance"]
rows = []
for event in root.xpath("//EVENT"):
    style = event.xpath("SWIMSTYLE")[0]

    #Kick out relays
    if int(style.get("relaycount")) > 1:
        continue

    rows.append([event.get("eventid"), event.get("gender"), style.get("stroke"), style.get("distance")])

df_events = pd.DataFrame(rows, columns=headers)
df_events = df_events.set_index(["ID",])
df_events.sort_index()
df_events

Unnamed: 0_level_0,Gender,Style,Distance
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,F,MEDLEY,200
2,M,MEDLEY,200
39,F,SURFACE,200
40,M,SURFACE,200
3,F,BREAST,100
4,M,BREAST,100
5,F,FLY,50
6,M,FLY,50
41,F,APNEA,25
42,M,APNEA,25


# Einlesen der relevanten Athleten + Ergebnisse

In [57]:
athletes = {}

for athlete_obj in root.xpath("//ATHLETE"):
    # Kick out athletes which are not part of the right club
    club_region_id = athlete_obj.xpath('./ancestor::CLUB/@region')

    if int(club_region_id[0]) != region_code_sax:
        continue

    # Create the athlete
    club = athlete_obj.xpath('./ancestor::CLUB/@name')[0]
    athlete = Athlete(athlete_obj.get("athleteid"), athlete_obj.get("lastname"), athlete_obj.get("firstname"), club, athlete_obj.get("gender"), athlete_obj.get("birthdate"))


    # Read the results
    for result_obj in athlete_obj.xpath("RESULTS/RESULT"):
        event_id = int(result_obj.get("eventid"))

        # Kick out not finished results
        if result_obj.get("status") is not None:
            continue

        result = Result(event_id, result_obj.get("swimtime"))

        athlete.results[event_id] = result

    # Add the finished athlete object to the list
    athletes[athlete.id] = athlete

# Ausgabe Ergebnis
print("Es wurden %d Athleten eingelesen" % len(athletes))

Es wurden 68 Athleten eingelesen


# Auswertung

## Berechnung der Ergebnisse

In [58]:
def calculate_points_for_event(event_id: int, category):
    # Basic definition
    headers = ["ID", "Vorname", "Nachname", "Zeit"]
    rows = []

    # Iterate through athletes
    for athlete_id, athlete in athletes.items():
        # Check for age & add category
        if not(categories[category][0] <= athlete.birth_date.year <= categories[category][1]):
            continue

        athlete.category = category

        # Check for event id
        if event_id not in athlete.results:
            continue

        row = [athlete.id, athlete.first_name, athlete.name, athlete.results[event_id].get_time()]
        rows.append(row)

    # Create the dataframe
    df_event_results = pd.DataFrame(data=rows, columns=headers)

    # Sort the dataframe
    df_event_results.sort_values(by="Zeit", inplace=True)

    # Determine the place
    df_event_results["place"] = np.arange(1, len(df_event_results)+1)

    # Convert the place to points
    df_event_results["points"] = df_event_results["place"].apply(convert_place_to_points)

    # Write back the points
    for index, row in df_event_results.iterrows():
        athletes[row["ID"]].results[event_id].points = row["points"]

# Iterate through events
for event_index, event_row in df_events.iterrows():
    # Iterate through categories
    for cat_key, cat_item in categories.items():
        calculate_points_for_event(int(event_index), cat_key)


## Zusammenbauen der Ergebnis-Tabelle

## Header

In [59]:
#Headers
headers_f = ["Vorname", "Nachname", "Geb. Jahr", "Verein", "Altersklasse"]
headers_m = ["Vorname", "Nachname", "Geb. Jahr", "Verein", "Altersklasse"]
for event_index, event_row in df_events.iterrows():
    if event_row["Gender"] == "F":
        headers_f.append("%s %s" % (event_row["Distance"], event_row["Style"]))
    else:
        headers_m.append("%s %s" % (event_row["Distance"], event_row["Style"]))

## Athleten

In [60]:
def get_rows_for_gender(gender):
    rows = {}
    for event_index, event_row in df_events.iterrows():

        # Select for gender
        if event_row["Gender"] != gender:
            continue

        for athlete_id, athlete in athletes.items():

            # Handle only athletes of specific gender
            if athlete.gender != gender:
                continue

            # Check if athlete exists
            if athlete_id not in rows:
                rows[athlete_id] = [athlete.first_name, athlete.name, athlete.birth_date.year, athlete.club, athlete.category]

            if int(event_index) in athlete.results:
                rows[athlete_id].append(athlete.results[int(event_index)].points)
            else:
                rows[athlete_id].append(0)

    return rows

rows_f = get_rows_for_gender("F")
rows_m = get_rows_for_gender("M")

## Berechnung der Punkte je Zeile

In [61]:
def calc_points_per_athlete(row):
    # Remove Name columns
    row = row.tail(-5)
    # Convert to numbers
    row = pd.to_numeric(row)
    # Get the 3 largest numbers
    row = row.nlargest(3)
    # Create the sum
    sum = row.sum()

    return sum

## Ergebnisse Weiblich

In [62]:
df_female = pd.DataFrame.from_dict(rows_f, orient="index", columns=headers_f)
df_female["Summe"] = df_female.apply(calc_points_per_athlete, axis=1)
df_female.sort_values(by=["Altersklasse", "Summe"], ascending=[True, False], inplace=True)
df_female

Unnamed: 0,Vorname,Nachname,Geb. Jahr,Verein,Altersklasse,200 MEDLEY,200 SURFACE,100 BREAST,50 FLY,25 APNEA,...,50 FREE,200 FREE,50 BACK,50 BREAST,50 FREE.1,50 FLY.1,100 FREE,100 FLY,200 BACK,Summe
452,Johanna Hermine,Ullrich,2007,1. Chemnitzer Tauchverein e.V.,2006 & 2007,0,0,0,0,46,...,0,0,0,0,0,0,0,0,0,146
852,Lotte,Marquardt,2007,Startgemeinschaft Dresden,2006 & 2007,0,50,0,0,50,...,0,0,0,0,0,0,0,0,0,146
853,Luisa,Bretschneider,2007,Startgemeinschaft Dresden,2006 & 2007,0,46,0,0,39,...,0,0,0,0,0,0,0,0,0,124
858,Sophiya,Razumovska,2007,Startgemeinschaft Dresden,2006 & 2007,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,85
849,Josephine,Neumann,2006,Startgemeinschaft Dresden,2006 & 2007,0,0,0,0,42,...,0,0,0,0,0,0,0,0,0,84
850,Katka,Schürer,2006,Startgemeinschaft Dresden,2006 & 2007,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,69
453,Jenny,Franke,2008,1. Chemnitzer Tauchverein e.V.,2008 & 2009,0,50,0,0,42,...,0,0,0,0,0,0,0,0,0,142
314,Polina,Kulchytska,2008,SC DHfK Leipzig Flossenschwimmen,2008 & 2009,0,46,0,0,0,...,0,0,0,0,0,0,0,0,0,142
108,Maren,Weller,2008,Tauchclub NEMO Plauen e.V.,2008 & 2009,0,42,0,0,39,...,0,0,0,0,0,0,0,0,0,123
111,Sara-Marie,Steinert,2008,Tauchclub NEMO Plauen e.V.,2008 & 2009,0,39,0,0,0,...,0,0,0,0,0,0,0,0,0,111


## Ergebnisse Männlich


In [63]:
df_male = pd.DataFrame.from_dict(rows_m, orient="index", columns=headers_m)
df_male["Summe"] = df_male.apply(calc_points_per_athlete, axis=1)
df_male.sort_values(by=["Altersklasse", "Summe"], ascending=[True, False], inplace=True)
df_male

Unnamed: 0,Vorname,Nachname,Geb. Jahr,Verein,Altersklasse,200 MEDLEY,200 SURFACE,100 BREAST,50 FLY,25 APNEA,...,50 FREE,200 FREE,50 BACK,50 BREAST,50 FREE.1,50 FLY.1,100 FREE,100 FLY,200 BACK,Summe
456,Emil,Lorenz,2006,1. Chemnitzer Tauchverein e.V.,2006 & 2007,0,0,0,0,50,...,0,0,0,0,0,0,0,0,0,150
329,Ben Joseph,Schoodt,2007,SC DHfK Leipzig Flossenschwimmen,2006 & 2007,0,0,0,0,46,...,0,0,0,0,0,0,0,0,0,142
113,Tim,Kaden,2007,Tauchclub NEMO Plauen e.V.,2006 & 2007,0,50,0,0,0,...,0,0,0,0,0,0,0,0,0,138
324,Leon,Hauk,2008,SC DHfK Leipzig Flossenschwimmen,2008 & 2009,0,0,0,0,46,...,0,0,0,0,0,0,0,0,0,146
458,Alwin,Müller,2009,1. Chemnitzer Tauchverein e.V.,2008 & 2009,0,50,0,0,42,...,0,0,0,0,0,0,0,0,0,138
450,Simon,Elle,2009,1. Chemnitzer Tauchverein e.V.,2008 & 2009,0,0,0,0,39,...,0,0,0,0,0,0,0,0,0,127
331,Alex Michael,Berger,2008,SC DHfK Leipzig Flossenschwimmen,2008 & 2009,0,0,0,0,50,...,0,0,0,0,0,0,0,0,0,100
844,Christoph,Hübner,2009,Startgemeinschaft Dresden,2008 & 2009,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,85
445,Pepe Milan,Becker,2011,TC Delitzsch,2010 & 2011,0,50,0,0,50,...,0,0,0,0,0,0,0,0,0,150
448,Viktor,Yemelianov,2010,1. Chemnitzer Tauchverein e.V.,2010 & 2011,0,46,0,0,0,...,0,0,0,0,0,0,0,0,0,138


# Export

In [64]:
with pd.ExcelWriter(output_file) as writer:
    df_male.to_excel(writer, sheet_name="male")
    df_female.to_excel(writer, sheet_name="female")