In [4]:
import pandas as pd
import numpy as np

# Laad CSV-bestand
df = pd.read_csv("temp_basin4.csv", parse_dates=["Time"])

# Juiste kolomnaam
naam_kolom = df.columns[1]

# 0-waarden
df = df.dropna()
df = df[df[naam_kolom] != 0]
df = df[df[naam_kolom] < 30]

rolling_median = df[naam_kolom].rolling(window=300, center=True).median()
diff = np.abs(df[naam_kolom] - rolling_median)
threshold = 2
df = df[diff < threshold]

# 3. Zet tijd als index en sorteer
df = df.set_index("Time").sort_index()

# 4. Maak tijdraster op exact 10-minutenintervallen
start = df.index.min().floor("10min")
end = df.index.max().ceil("10min")
time_grid = pd.date_range(start=start, end=end, freq="10min")

# 5. Nieuwe dataframe met enkel de grid
df_grid = pd.DataFrame(index=time_grid)

# 6. Voeg originele data toe en interpoleer
df_full = pd.concat([df, df_grid], axis=0).sort_index()
df_interp = df_full.interpolate(method="time")

# 7. Selecteer enkel waarden exact op 10-minuten
df_final = df_interp.loc[time_grid].reset_index().rename(columns={"index": "Time"})

# Zorg dat 'Time' kolom datetime is (zou het al moeten zijn na voorgaande code)
df_final["Time"] = pd.to_datetime(df_final["Time"])

# Filter enkel waarden vanaf (inclusief) 25 oktober 2023
df_filtered = df_final[df_final["Time"] >= pd.Timestamp("2023-10-25")]
df_filtered = df_filtered.rename(columns={"Time": "Datetime"})

# (Optioneel) Opslaan
df_filtered.to_csv("temperature_interpolated.csv", index=False)

print(df_filtered.head(10))

                Datetime  temp_basin4
3441 2023-10-25 00:00:00         18.9
3442 2023-10-25 00:10:00         18.9
3443 2023-10-25 00:20:00         19.0
3444 2023-10-25 00:30:00         19.0
3445 2023-10-25 00:40:00         19.0
3446 2023-10-25 00:50:00         19.0
3447 2023-10-25 01:00:00         19.0
3448 2023-10-25 01:10:00         19.0
3449 2023-10-25 01:20:00         19.0
3450 2023-10-25 01:30:00         19.0
