In [1]:
import os

import pandas as pd

# Define the base directory
base_dir = "./Studie"  # Change this to your actual path

# Define the three variants
variants = ["DrehenFangen", "EntgegenFangen", "Tischtennis", "TischtennisSeite"]

# Initialize a dictionary to store dataframes
dataframes = {variant: [] for variant in variants}

# Loop through all subdirectories and find matching CSV files
for root, dirs, files in os.walk(base_dir):
    for file in files:
        for variant in variants:
            if file.startswith(variant) and file.endswith(".csv"):
                file_path = os.path.join(root, file)
                df = pd.read_csv(file_path, sep=";")
                df["Source"] = file  # Add filename as a column for reference
                dataframes[variant].append(df)

# Concatenate all dataframes per variant
for variant in variants:
    if dataframes[variant]:
        dataframes[variant] = pd.concat(dataframes[variant], ignore_index=True)

# Example: Display the first few rows of one variant
print(dataframes["Tischtennis"].head())


   ID                  Date   Points              Source
0  11   2025-02-19 10:17:31        3  Tischtennis_11.csv
1  11   2025-02-19 10:17:40        2  Tischtennis_11.csv
2  11   2025-02-19 10:17:43        1  Tischtennis_11.csv
3  11   2025-02-19 10:17:48        1  Tischtennis_11.csv
4  11   2025-02-19 10:17:53        2  Tischtennis_11.csv


In [2]:
#strip all columns names
dataframes["DrehenFangen"].columns = dataframes["DrehenFangen"].columns.str.strip()
dataframes["EntgegenFangen"].columns = dataframes["EntgegenFangen"].columns.str.strip()
dataframes["Tischtennis"].columns = dataframes["Tischtennis"].columns.str.strip()
dataframes["TischtennisSeite"].columns = dataframes["TischtennisSeite"].columns.str.strip()

# filter out rows that have TischtennisSeite in the Source column in Tischtennis variant
dataframes["Tischtennis"] = dataframes["Tischtennis"][
    ~dataframes["Tischtennis"]["Source"].str.contains("TischtennisSeite")]

In [3]:
# sort each variant by Date and reset index
for variant in variants:
    dataframes[variant] = dataframes[variant].sort_values(by="Date")
    dataframes[variant] = dataframes[variant].reset_index(drop=True)

In [4]:
#save each variant as csv
for variant in variants:
    dataframes[variant].to_csv(f"{variant}.csv", index=False)