# Initialisierung

**Libraries**<br>
pandas:     Datenverarbeitung<br>
os:         Betriebsystem-Funktionen für relative Pfadreferenzierung<br>
datetime:   Verarbeitung der Timstamps<br>

In [12]:
import pandas as pd
import os
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import StandardScaler

dirname = os.path.abspath('')

df_path = os.path.join(dirname, '2019_Cleaned_Data_15min.csv')

path_building1 = os.path.join(dirname, 'Rohdaten_Last_Braunschweig\Building1.csv')
path_building2 = os.path.join(dirname, 'Rohdaten_Last_Braunschweig\Building2.csv')
path_building3 = os.path.join(dirname, 'Rohdaten_Last_Braunschweig\Building3.csv')
path_building4 = os.path.join(dirname, 'Rohdaten_Last_Braunschweig\Building4.csv')
path_building5 = os.path.join(dirname, 'Rohdaten_Last_Braunschweig\Building5.csv')

**Merging**

In [13]:
df = pd.read_csv(df_path)
print(df)


       RWS_DAU_10  RWS_10  DS_10  GS_10  SD_10  FF_10  DD_10   PP_10  TT_10  \
0             8.0    0.00    0.0    0.0    0.0    4.4  260.0  1020.7    7.9   
1             5.0    0.00    0.0    0.0    0.0    6.4  260.0  1019.9    7.5   
2             9.0    0.00    0.0    0.0    0.0    6.4  270.0  1020.0    7.4   
3             9.0    0.03    0.0    0.0    0.0    7.0  270.0  1019.8    7.3   
4             8.0    0.00    0.0    0.0    0.0    6.4  260.0  1019.6    7.3   
...           ...     ...    ...    ...    ...    ...    ...     ...    ...   
48329         0.0    0.00    0.0    0.0    0.0    0.9  240.0  1028.3   -0.4   
48330         0.0    0.00    0.0    0.0    0.0    1.1  270.0  1028.4   -0.7   
48331         0.0    0.00    0.0    0.0    0.0    1.5  290.0  1028.6   -0.8   
48332         0.0    0.00    0.0    0.0    0.0    1.9  290.0  1028.4   -0.2   
48333         0.0    0.00    0.0    0.0    0.0    1.6  260.0  1028.3    0.1   

       TM5_10  RF_10                 MESS_DATUM  
0

In [14]:

# Define a custom aggregation dictionary for each column
aggregation_dict = {
    'RWS_DAU_10':   'sum',      # Regendauer (10-min Messungen)
    'RWS_10':       'sum',      # Regenmenge (Höhe in mm, 10-min Messungen)
    'DS_10':        'sum',      # Diffuse Strahlung (10-min Messungen)
    'GS_10':        'sum',      # Globale Strahlung (10-min Messungen)
    'SD_10':        'sum',      # Sonnenschein-Dauer (10-min Messungen)
    'FF_10':        'mean',     # Durchschn. Windgeschwindigkeit
    'DD_10':        'mean',     # Durchschn. Windrichtung
    'PP_10':        'mean',     # Luftdruck auf Höhe der Messstation
    'TT_10':        'mean',     # Lufttemperatur 2 m über dem Boden
    'TM5_10':       'mean',     # Lufttemperatur 5 cm über dem Boden
    'RF_10':        'mean'      # Relative Luftfeuchtigkeit
}

# Convert the 'rec_time' column to datetime if it's not already
df['MESS_DATUM'] = pd.to_datetime(df['MESS_DATUM'], utc=True)
# Set the 'rec_time' column as the index
df.set_index('MESS_DATUM', inplace=True)

# Apply the aggregation using the agg method
saved_df = df.resample('1H').agg(aggregation_dict)

# Display the resulting DataFrame
print(saved_df)
saved_df = saved_df.reset_index()
print(saved_df)

                           RWS_DAU_10  RWS_10  DS_10  GS_10  SD_10     FF_10  \
MESS_DATUM                                                                     
2019-01-01 00:00:00+00:00        13.0    0.00    0.0    0.0    0.0  5.400000   
2019-01-01 01:00:00+00:00        35.0    0.03    0.0    0.0    0.0  6.375000   
2019-01-01 02:00:00+00:00        30.0    0.03    0.0    0.0    0.0  6.940000   
2019-01-01 03:00:00+00:00         0.0    0.00    0.0    0.0    0.0  6.400000   
2019-01-01 04:00:00+00:00         8.0    0.00    0.0    0.0    0.0  7.525000   
...                               ...     ...    ...    ...    ...       ...   
2019-12-31 19:00:00+00:00         0.0    0.00    0.0    0.0    0.0  2.000000   
2019-12-31 20:00:00+00:00         0.0    0.00    0.0    0.0    0.0  1.933333   
2019-12-31 21:00:00+00:00         0.0    0.00    0.0    0.0    0.0  1.733333   
2019-12-31 22:00:00+00:00         0.0    0.00    0.0    0.0    0.0  1.383333   
2019-12-31 23:00:00+00:00         0.0   

**Haushaltslasten mergen**

In [15]:
data_building1 = pd.read_csv(path_building1)
data_building2 = pd.read_csv(path_building2)
data_building3 = pd.read_csv(path_building3)
data_building4 = pd.read_csv(path_building4)
data_building5 = pd.read_csv(path_building5)

data_building_list = [data_building1, data_building2, data_building3, data_building4, data_building5]
# Loop through each data_building DataFrame
for i, df in enumerate(data_building_list):
    # Convert the 'rec_time' column to datetime if it's not already
    df['rec_time'] = pd.to_datetime(df['rec_time'], utc=True)
    # Set the 'rec_time' column as the index
    df.set_index('rec_time', inplace=True)
    df.index += pd.Timedelta(hours=1)
    # Create a DatetimeIndex to enable resampling
    df.index = pd.to_datetime(df.index)
    # Resample the data to 1-hour intervals, maintaining the sum of each hour
    df = df.resample('1H', origin='start').sum()
    # Reset the index to turn the index into a regular column
    df = df.reset_index()
    data_building_list[i] = df


data_building1 = pd.merge(saved_df, data_building_list[0], left_index=True, right_index=True)
data_building2 = pd.merge(saved_df, data_building_list[1], left_index=True, right_index=True)
data_building3 = pd.merge(saved_df, data_building_list[2], left_index=True, right_index=True)
data_building4 = pd.merge(saved_df, data_building_list[3], left_index=True, right_index=True)
data_building5 = pd.merge(saved_df, data_building_list[4], left_index=True, right_index=True)

df = pd.concat([data_building1, data_building2, data_building3, data_building4, data_building5], ignore_index=True)

df = df.drop('rec_time', axis=1)
print(df)

df.to_csv('2019_Merged_Clean_Data_1h.csv', index=False) #index=FALSE for not including row indices


                     MESS_DATUM  RWS_DAU_10  RWS_10  DS_10  GS_10  SD_10  \
0     2019-01-01 00:00:00+00:00        13.0    0.00    0.0    0.0    0.0   
1     2019-01-01 01:00:00+00:00        35.0    0.03    0.0    0.0    0.0   
2     2019-01-01 02:00:00+00:00        30.0    0.03    0.0    0.0    0.0   
3     2019-01-01 03:00:00+00:00         0.0    0.00    0.0    0.0    0.0   
4     2019-01-01 04:00:00+00:00         8.0    0.00    0.0    0.0    0.0   
...                         ...         ...     ...    ...    ...    ...   
43795 2019-12-31 19:00:00+00:00         0.0    0.00    0.0    0.0    0.0   
43796 2019-12-31 20:00:00+00:00         0.0    0.00    0.0    0.0    0.0   
43797 2019-12-31 21:00:00+00:00         0.0    0.00    0.0    0.0    0.0   
43798 2019-12-31 22:00:00+00:00         0.0    0.00    0.0    0.0    0.0   
43799 2019-12-31 23:00:00+00:00         0.0    0.00    0.0    0.0    0.0   

          FF_10       DD_10        PP_10     TT_10    TM5_10      RF_10  \
0      5.400