In [None]:
import pandas as pd
import json
import time
import random
import copy

with open("test_data.json", "r") as f:
    test_data = json.loads(f.read())
df = pd.DataFrame(test_data)

def random_date():
    start = time.mktime(time.strptime("2015-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"))
    end = time.mktime(time.strptime("2026-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"))
    random_date = start + random.random() * (end - start)
    return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(random_date))

random_dates = [random_date() for x in range(len(df["index"]))]
df["timestamp"] = random_dates

df.dropna()
df2 = copy.copy(df)
df3 = pd.concat([df, df2])

with open("data2.csv", "w") as f:
    df3.to_csv(f, index=False)

In [9]:
import tomllib
with open("settings.toml", "rb") as f:
    config = tomllib.load(f)

with open("sensors.toml", "rb") as f:
    sensors = tomllib.load(f)

In [24]:
# Generate dummy data
import datetime 
import random

index_name = "#"
timestamp_name = "Date-Time (CEST)"
temperature_name = "Temperatur, °C"

def generate_data(amt_days=30, daily_samples=144):
    data = {
        index_name: [x for x in range(amt_days*144)],
        timestamp_name: [],
        temperature_name: []
    }
    
    cur_date = datetime.datetime(2015, 3, 1, 0, 0, 0)
    step_minutes = (24*60)/daily_samples
    time_to_add = datetime.timedelta(minutes=step_minutes)

    for i in data[index_name]:
        data[timestamp_name].append(cur_date.strftime(config["formats"]["time_format"]))
        data[temperature_name].append(random.random())
        cur_date += time_to_add

    print(f"{index_name}: {len(data[index_name])}, {timestamp_name}: {len(data[timestamp_name])}, {temperature_name}: {len(data[temperature_name])}")
    return data


In [25]:
data = generate_data()
df = pd.DataFrame(data)

with open("data3.csv", "w", encoding="utf-8") as f:
    df.to_csv(f, index=False)

#: 4320, Date-Time (CEST): 4320, Temperatur, °C: 4320


In [2]:
# Shuffle data rows to test with PowerBI

import pandas as pd

with open("all_data.csv", "r") as f:
    data = pd.read_csv(f)

# Shuffle rows
data = data.sample(frac=1)

# Save
with open("./data/all_data_shuffled.csv", "w") as f:
    data.to_csv(f, index=False)

In [3]:
import pandas as pd

def getNewestSensorEntries(path_to_file: str):
    with open(path_to_file, "r") as f:
        df = pd.read_csv(f)

    sensors = df["Sensor"].unique()
    print(f"Found {len(sensors)} sensor(s) in file")
    
    for sensor in sensors:
        sensor_time_min = df.query("Sensor == @sensor")["Datum"].max()
        print(sensor, sensor_time_min)

getNewestSensorEntries("all_data.csv")

Found 11 sensor(s) in file
FGV_01 2025-02-12 15:50:00
FGV_02 2025-02-12 16:00:00
FGV_03 2025-02-12 16:10:00
FGV_04 2024-07-19 20:40:00
FGV_05 2024-07-19 20:50:00
FGV_06 2024-07-24 15:10:00
FGV_07 2024-07-24 12:40:00
FGV_08 2025-02-13 13:30:00
FGV_09 2025-02-13 13:30:00
FGV_10 2024-09-23 10:30:00
FGV_11 2024-11-24 11:50:00


In [None]:
# Sort data after sensor and timestamp
import pandas as pd
import time 

def sortFile(path, savepath):

    stime = time.perf_counter()

    with open(path, "r", encoding="utf-8") as f:
        df = pd.read_csv(f)

    # sensors = df["Sensor"].unique()
    df["Datum"] = pd.to_datetime(df["Datum"], format=config["formats"]["time_format"])
    df.dropna(inplace=True)
    df.drop_duplicates()

    df_sorted = df.sort_values(by=["Sensor", "Datum"], ascending=[True, False])

    with open(savepath, "w", encoding="utf-8") as f:
        df_sorted.to_csv(f, index=False)

    print(f"Done in {(time.perf_counter()-stime)*1000:.2f}ms")


In [14]:
sortFile(r"data\all_data_shuffled.csv", r"data\shuffled_sorted.csv")

Done in 1269.60ms
