In [None]:
# Read configs (required!)

import tomllib
with open("settings.toml", "rb") as f:
    config = tomllib.load(f)

with open("sensors.toml", "rb") as f:
    sensors = tomllib.load(f)

In [None]:
def transformSensorFile(df_dict: dict, sensor_name: str, datetime_col=False):
    """Split the three existing columns into eight with sensor info, location and separate columns for time data."""
    df_dict["df"].drop(df_dict["idxcol"], axis=1, inplace=True)
    if not datetime_col: df_dict["df"]["Datum"] = df_dict["df"][df_dict["timecol"]].dt.date
    else: df_dict["df"]["Datum"] = df_dict["df"][df_dict["timecol"]]
    df_dict["df"]["Jahr"] = df_dict["df"][df_dict["timecol"]].dt.year
    df_dict["df"]["Monat"] = df_dict["df"][df_dict["timecol"]].dt.month
    df_dict["df"]["Tag"] = df_dict["df"][df_dict["timecol"]].dt.day
    df_dict["df"]["Uhrzeit"] = df_dict["df"][df_dict["timecol"]].dt.time
    df_dict["df"]["Sensor"] = sensor_name
    df_dict["df"]["Standort"] = sensors[sensor_name]["location"]
    df_dict["df"].drop(df_dict["timecol"], axis=1, inplace=True)
    df_dict["df"].rename(columns={df_dict["tmpcol"]: "Temperatur"}, inplace=True)
    return df_dict

In [None]:
# Read excel files
import pandas as pd
import glob
import re
import time

path_to_files = r"./data"
save_path = "all_data.csv"

def concat_sensor_files(path_to_files: str, save_path=None) -> None | pd.DataFrame :
    """Concatenate all given csv files collected from sensors into new ones."""
    data_paths = glob.glob(path_to_files+"\\FGV_*.xlsx", recursive=True)
    print("Found", len(data_paths), "files")

    sensors_chunks = {key: [] for key in sensors.keys()}

    if save_path is not None: stime = time.perf_counter()

    for idx, file in enumerate(data_paths):
        sensor_name = re.search(r"FGV_\d+", file).group()
        if not sensor_name in sensors.keys():
            raise(NameError(f"Trying to read sensor {sensor_name} which is not defined in sensors.toml"))
        print("Reading sensor", sensor_name, f"({idx+1}/{len(data_paths)})")
        df = pd.read_excel(file)

        idxcol, timecol, tmpcol = None, None, None
        for idx, col in enumerate(df.columns):
            # print(idx, col)
            if config["names"]["index_column"] in col:
                idxcol = col
                # print("Found index column:", col)
            elif config["names"]["timestamp_column"] in col:
                timecol = col
                # print("Found timestamp column:", col)
            elif config["names"]["temperature_column"] in col:
                tmpcol = col
                # print("Found temperature column:", col)
            else: raise(IndexError(f"Found unknown column: {col}"))

        df[timecol] = pd.to_datetime(df[timecol], format=config["formats"]["time_format"])
        df.sort_values(timecol, ascending=False, inplace=True)
        # sensors_chunks[sensor_name].append({"df": df, "idxcol": idxcol, "timecol": timecol, "tmpcol": tmpcol})
        df = transformSensorFile({"df": df, "idxcol": idxcol, "timecol": timecol, "tmpcol": tmpcol}, sensor_name, datetime_col=True)["df"]
        sensors_chunks[sensor_name].append(df)

    # Use topmost entry
    searchfunc = lambda x: x["Datum"].iloc[0]
    # Sort chunks after newest newest entry
    for key in sensors_chunks.keys():
        sensors_chunks[key].sort(key=searchfunc, reverse=True) # Newest at top
        for idx, x in enumerate(sensors_chunks[key]):
            if save_path is not None: print(f"{idx}: {searchfunc(x)}", end=" ")
        if save_path is not None: print(f"(Sensor {key})")
        sensors_chunks[key] = pd.concat(sensors_chunks[key])

    all_sensors_chunks = [sensors_chunks[key] for key in sensors_chunks.keys()]
    all_sensors_chunks = pd.concat(all_sensors_chunks)
    # sensors_chunks should now contain all the sensor files, sorted for all sensors read with newest data at the top

    if save_path is not None:
        with open(save_path, "w") as f:
            all_sensors_chunks.to_csv(f, index=False)

        ttime = time.perf_counter() - stime
        print(f"Finished in {ttime:.2f}s")

    else: return all_sensors_chunks

# concat_sensor_files(path_to_files, save_path)

In [None]:
def append_sensor_files(path_to_files: str, old_file: str, save_path: str):
    """Concatenate an existing file with new ones."""
    stime = time.perf_counter()
    print("Reading old file...")
    base = pd.read_csv(old_file)
    print(f"Done ({time.perf_counter()-stime:.2f}s). Concatenating new files...")
    new = concat_sensor_files(path_to_files=path_to_files)
    print(f"Done ({time.perf_counter()-stime:.2f}s). Combining...")
    combined = pd.concat([new, base])
    print(f"Done ({time.perf_counter()-stime:.2f}s). Saving...")
    with open(save_path, "w") as f:
        combined.to_csv(f, index=False)

    print(f"Done combining files, took {time.perf_counter()-stime:.2f}s")

append_sensor_files(path_to_files, save_path, "all_duplicate.csv")

Reading old file...
Done (0.21s). Concatenating new files...
Found 16 files
Reading sensor FGV_01 (1/16)
Reading sensor FGV_02 (2/16)
Reading sensor FGV_02 (3/16)
Reading sensor FGV_03 (4/16)
Reading sensor FGV_03 (5/16)
Reading sensor FGV_04 (6/16)
Reading sensor FGV_05 (7/16)
Reading sensor FGV_06 (8/16)
Reading sensor FGV_07 (9/16)
Reading sensor FGV_08 (10/16)
Reading sensor FGV_08 (11/16)
Reading sensor FGV_09 (12/16)
Reading sensor FGV_09 (13/16)
Reading sensor FGV_10 (14/16)
Reading sensor FGV_10 (15/16)
Reading sensor FGV_11 (16/16)
Done (5.07s). Combining...
Done (5.20s). Saving...
Done combining files, took 7.20s
