In [None]:
import os
import csv
import pandas as pd
import numpy as np
import formulaEvo

def getCSVData(csvfile):
    print(f"Processing file: {csvfile}")
    fileData = pd.read_csv(csvfile, sep=';', decimal=',')
    fileData = fileData.iloc[2:].reset_index(drop=True)  # Remove first 2 lines

    stationData = {}

    stationData['time'] = pd.to_datetime(fileData['TIMESTAMP'], format='%d.%m.%Y %H:%M')
    stationData['temp'] = fileData['AirTC_Avg'].astype(str).str.replace(',', '.').astype(float)
    stationData['humi'] = fileData['RH_Avg'].astype(str).str.replace(',', '.').astype(float)
    stationData['light'] = (fileData['SlrW_Avg'].astype(str).str.replace(',', '.').astype(float)*10*60)/1000000  # von W/m² zu Ws/m² (10min Mittelwert), dann zu MJ/m²

    #NAN werte ersetzten und finden
    for key in stationData: 
        # Finde NaN-Positionen vor dem Füllen
        nan_positions = stationData[key].isnull()
        if nan_positions.any():
            nan_indices = nan_positions[nan_positions].index.tolist()
            print(f"NaN values found in {key} at positions: {nan_indices}")
            print(f"Total NaN values in {key}: {nan_positions.sum()}")
        
        stationData[key] = stationData[key].ffill(limit=1)
        
        # Prüfe nach dem Füllen auf verbleibende NaN-Werte
        remaining_nan = stationData[key].isnull()
        if remaining_nan.any():
            remaining_indices = remaining_nan[remaining_nan].index.tolist()
            print(f"Remaining NaN values in {key} after forward fill at positions: {remaining_indices}")
            raise ValueError(f"More than one NaN value in a row in {key} detected at positions: {remaining_indices}")

    # Trenne data in einzelne Tage auf
    stationDays = []
    n = 0
    for day, group in pd.DataFrame(stationData).groupby(pd.to_datetime(stationData['time']).dt.date):
        #lasse nur daten von Kalenderwoche 18 bis 39 zu
        if day.strftime('%W') >= '18' and day.strftime('%W') <= '39':
            #rohdaten der groupe überprüfen auf lücken
            stationDays.append({
                'date': day,
                'temp_diff_day': max(group['temp'].to_numpy()) - min(group['temp'].to_numpy()),
                'time': group['time'],
                'temp': group['temp'].to_numpy(),
                'light': group['light'].to_numpy(),
                'humi': group['humi'].to_numpy()
            })

    #Erechnen der täglichen Temperartur Differenz
    temp_avg_year = np.mean([day['temp_diff_day'] for day in stationDays])
    print(f"Average daily temperature difference from Tepfenhardt data: {temp_avg_year:.2f} °C")

    #Analysiere die Evaporationen
    for day in stationDays:
        evaporation = formulaEvo.calculate_evaporation_solar(
            day['date'],
            day['temp_diff_day'],
            formulaEvo.krKorean(temp_avg_year),
            day['light']
        )
        day['evo'] = evaporation
    return stationDays


def seconds_since_midnight(timestamps):
    """Gibt die Tageszeit in Sekunden (0-86400) für ein datetime-Objekt zurück."""
    midnight = timestamps.dt.normalize()
    return (timestamps - midnight).dt.total_seconds()

def make_drain_curve(total_drain, samples):
    # nach 10 min 70%
    # nach 20 min 90%
    # nach 30 min 100%
    # erstellt eine Drainkurve in der Länge der Episoden samples
    values = [0, 0.7*total_drain, 0.9*total_drain]
    # Schneide oder fülle auf die gewünschte Länge
    if samples <= len(values):
        return values[:samples]
    else:
        values += [total_drain] * (samples - len(values))
        return values

def split_day_by_evo(day, threshold=2.0):
    #episode auschneiden länge = 5l(ideal)+ Überschuss Drain in l([-0,75,+2]l)
    #dann wert von Überschusss (+-2l) durch ausgleichskurve in Drainkurve umwandeln
    # nach 10 min 70%
    # nach 20 min 90%
    # nach 30 min 100%
    """
    Teilt jeden Tag in tepfi_days in Episoden, sodass jede Episode ca. threshold mm (z.B. 2mm) kumulierte Verdunstung enthält.
    Jede Episode enthält die kompletten Wertearrays (temp, humi, light, etc.) für den jeweiligen Abschnitt.
    Rückgabe: Liste von Episoden, jede Episode ist ein Dict mit den Feldern wie ein Tag, aber nur für den Episodenbereich.
    """
    episodes = []
    evo = np.asarray(day['evo'])
    cumsum = np.cumsum(evo)
    start_idx = 0
    last_cum = cumsum[0] if len(cumsum) > 0 else 0
    drain = np.random.uniform(-0.75, 2.0)
    threshold_total = threshold + drain  # füge zufälligen Überschuss hinzu
    if 'date' in day:
        del day['date']
    if 'temp_diff_day' in day:
        del day['temp_diff_day']
    for i in range(1, len(evo)):
        if cumsum[i] - last_cum >= threshold_total:
            epi = {}
            for key in day:
                epi[key] = day[key][start_idx:i]
            epi['drain'] = make_drain_curve(threshold_total, i - start_idx)
            episodes.append(epi)
            start_idx = i
            last_cum = cumsum[i]
            new_drain = np.random.uniform(-0.75, 2.0)
            threshold_total = threshold + new_drain  # neuer zufälliger Überschuss
    # letzte Episode bis zum Ende
    if start_idx < len(evo):
        epi = {}
        for key in day:
            epi[key] = day[key][start_idx:]
        episodes.append(epi)
    return episodes

#save Episodes from data
def save_Episode(baseFolder, month, day, station, episodes):
    if not os.path.exists(baseFolder):
        os.makedirs(baseFolder)

    for i, epi in enumerate(episodes):
        fileName = os.path.join(baseFolder, f"{station}-{month}-{day}-Epi{i}.csv")
        with open(fileName, "w", newline="") as file:
            writer = csv.writer(file)
            for j in range(len(epi['time'])):
                observation = [
                    epi['doy'],
                    epi['abstime'][j],
                    epi['time'][j],
                    epi['temp'][j],
                    epi['humi'][j],
                    epi['light'][j],
                    epi['drain'][j],
                    epi['timeToIrri'][j]
                ]
                writer.writerow(observation)

In [10]:
csvfolder = "../CSV AgrarMeteo"
savePath = "../episFormula/1. Basic"
episodenAll = []

for csvfile in os.listdir(csvfolder):
    if csvfile.endswith(".csv"):
        csvfile = os.path.join(csvfolder, csvfile)
        stationDays = getCSVData(csvfile)
        for day in stationDays:
            epiDay = split_day_by_evo(day, threshold=5.0)
            episodenRaw = epiDay[1:-1]  # ggf. [1:-1] für Episodenfilter
            episoden = []
            for epiRaw in episodenRaw:
                episode = {}
                episode['doy'] = pd.Timestamp(epiRaw['time'].iloc[0]).dayofyear / 280  # max 280 Tage (KW39=273doy)
                episode['abstime'] = np.array(seconds_since_midnight(epiRaw['time'])) / (24*3600)
                episode['time'] = np.array((epiRaw['time'] - epiRaw['time'].iloc[0]).dt.total_seconds()) / (60 * 300) # in min und max 300 min Episode
                episode['temp'] = np.array(epiRaw['temp']) / 25 # 25°C max
                episode['humi'] = np.array(epiRaw['humi']) / 100 # 100% max
                episode['light'] = epiRaw['light'].cumsum() / 20 # 20 MJ/testfläche max
                episode['timeToIrri'] = np.array((epiRaw['time'].iloc[-1] - epiRaw['time']).dt.total_seconds()) / 60 # in min
                episode['drain'] = np.array(epiRaw['drain']) / 7 # 7l max
                episoden.append(episode)
            episodenAll.append(episoden)
            month = pd.Timestamp(day['time'].iloc[0]).month
            daynum = pd.Timestamp(day['time'].iloc[0]).day
            save_Episode(savePath, str(month), str(daynum), episoden)

Processing file: ../CSV AgrarMeteo\Bavendorf.csv


  fileData = pd.read_csv(csvfile, sep=';', decimal=',')


ValueError: More than one NaN value in a row in temp detected.