# Data Preparation

This file contains the code to

1) Read in prepared load and weather data, create a consistent 15 min intervall index by averaging the available weather data for the 15 min timespan, filling missing values with previous values

2) analyse different weather parameters, statistical measures and data visualization

-------------

#### Imports

In [None]:
import h5py
import pandas as pd
import numpy as np
import pickle 
from datetime import datetime
import math
import matplotlib
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None 

### Adapt weather index and merge weather data

In [None]:
# reads in weather and heatpump data
# adapts weather index to load index by taking the mean value of 15 minute intervalls
# if weather data is not available (due to higher time stamp differences) the previous value is taken for the timestamp

with open('Data/heatpump/data_heatpump.pkl', 'rb') as f:
    load_dict = pickle.load(f)
with open('Data/weather/data_weather.pkl', 'rb') as f:
    weather_dict = pickle.load(f)

ref_index = load_dict['SFH10']['index']
df_list = []
for df_type in weather_dict:
    df_ref = ref_index.to_frame().set_index('index')
    df_ref[df_type] = np.nan
    df_temp = weather_dict[df_type]
    for index in ref_index:
        sub_df = df_temp[(df_temp['index'] >= index) & (df_temp['index'] <= index+900)]
        if sub_df.empty:
            #take previous value
            df_ref.loc[index][df_type] = df_ref.loc[index-900][df_type]
        else:
            #take mean value
            df_ref.loc[index][df_type] = sub_df.iloc[:,1].mean()
    df_list.append(df_ref)
weather_data = pd.concat(df_list, axis=1)
with open('Data/weather/data_weather_merged.pkl', 'wb') as f:
    pickle.dump(weather_data, f)

In [None]:
with open('Data/weather/data_weather_merged.pkl', 'rb') as f:
    weather_data = pickle.load(f)
weather_data

In [None]:
df_analysis = pd.DataFrame(columns=weather_data.columns, index=['min', 'max', 'mean', 'median', 'missing values'])
for column in weather_data.columns:
    df_analysis.loc['min'][column] = weather_data[column].min()
    df_analysis.loc['max'][column] = weather_data[column].max()
    df_analysis.loc['mean'][column] = weather_data[column].mean()
    df_analysis.loc['median'][column] = weather_data[column].median()
    df_analysis.loc['missing values'][column] = len(weather_data) - weather_data[column].value_counts().sum()
df_analysis

In [None]:
data_plots = weather_data.copy()
data_plots.reset_index(inplace=True)
data_plots['index'] = pd.to_datetime(data_plots['index'], unit='s')
data_plots.set_index('index', inplace=True)

fig, a = plt.subplots(5, 2, figsize=(20, 20), tight_layout=True)
data_plots.plot(ax=a, subplots=True, rot=60)

### Analysis of merged load data

In [None]:
#read in data
with open('Data/heatpump/data_heatpump.pkl', 'rb') as f:
    load_dict = pickle.load(f)

In [None]:
df_10 = load_dict['SFH10'][['index', 'S_TOT', 'Q_TOT', 'P_TOT', 'PF_TOT']]

In [None]:
len(df_10)

In [None]:
df_10['P_TOT'].describe()

In [None]:
df_10['P_TOT'].info()