In [1]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# outside temp file
outside_temp_df = pd.read_csv('/case_study/case_study_data/metdata_dwd_Waltrop.csv')

In [3]:
def data_loader(file_name1, file_name2):
    
    # reading files
    df1 = pd.read_excel('/case_study/case_study_data/' + file_name1, header = 4)
    df2 = pd.read_excel('/case_study/case_study_data/' + file_name2, header = 4)
    
    df1 = df1.drop(0, axis = 0) # droping 1st row
    df1 = df1.reset_index(drop = True)
    df1 = df1.rename({'OBIS Bezeichnung' : 'Time'}, axis = 1)

    df1['Time'] = pd.to_datetime(df1['Time']) # convert to datetime format

    df2 = df2.drop(0, axis = 0) # droping 1st row
    df2 = df2.reset_index(drop = True)
    df2 = df2.rename({'OBIS Bezeichnung' : 'Time'}, axis = 1)

    df2['Time'] = pd.to_datetime(df2['Time']) # convert to datetime format
    
    df1 = df1.drop_duplicates(['Time'])
    df2 = df2.drop_duplicates(['Time'])

    concat_df = pd.concat([df1, df2]) # concating data frames
    

    concat_df['Date'] = concat_df['Time'].dt.date
    concat_df['Date'] = concat_df['Date'].astype(str)
    
    return concat_df

In [4]:
oh14_df = data_loader(file_name1 = 'OH14.xls', file_name2 = 'OH14_01_26-07_19.xls')
oh12_df = data_loader(file_name1 = 'OH12.xls', file_name2 = 'OH12_01_26-07_19.xls')
kita_hokida_df = data_loader(file_name1 = 'Kita Hokido.xls', file_name2 = 'Kita Hokido_05_22_20-07_19_22.xls')
chemie_df = data_loader(file_name1 = 'Chemie.xls', file_name2 = 'Chemie_01_26-07_19.xls')
gross_df = data_loader(file_name1 = 'Großtagespflege.xls', file_name2 = 'Grosstagespflege_04_05-07_19.xls')
hg_2_df  = data_loader(file_name1 = 'HG II.xls', file_name2 = 'HGII_01_26-07_19.xls')



In [5]:
# droping duplicates
oh14_df = oh14_df.drop_duplicates(['Time']) # droping duplicates on basis of "Recorded Time" in oh14 data
oh12_df = oh12_df.drop_duplicates() # droping identical rows
kita_hokida_df = kita_hokida_df.drop_duplicates() # droping identical rows
chemie_df = chemie_df.drop_duplicates(keep = 'last') 
gross_df = gross_df.drop_duplicates(['Time'], keep = 'first')
hg_2_df = hg_2_df.drop_duplicates(['Time'], keep = 'first')

#### analysis for oh14

In [114]:
# For OH14 we have data from 2021-07-06  to 2022-07-19.

In [6]:
# getting only those rows which are recording on hourly bases
oh14_hourly_df = oh14_df[oh14_df['Time'].dt.minute == 0]
outside_temp_df['Zeitstempel'] = pd.to_datetime(outside_temp_df['Zeitstempel']) # covert to datetime format

# taking some selcted columns
oh14_en = oh14_hourly_df[['Time', 'Date', 'WV+ Arbeit Tarif 1', 'Durchfluss', 'Wärmeenergie Tarif 1',
                          'Wärmeenergie Tarif 1.1', 'Volumen Kanal 1']]
oh14_en['Time'] = oh14_en['Time'].astype('str')
outside_temp_df['Zeitstempel'] = outside_temp_df['Zeitstempel'].astype(str)

# changing data type to float 
oh14_en[oh14_en.columns[~oh14_en.columns.isin(['Time', 'Date'])]] = oh14_en[oh14_en.columns[~oh14_en.columns.isin(['Time', 'Date'])]].astype('float')


# merging the dataframe with outside temperature file (using inner join to find all values)
merge_df = oh14_en.merge(outside_temp_df, left_on=['Time'], right_on=['Zeitstempel'], how = 'inner')

# merge_df['WV+ Arbeit Tarif 1 diff'] = merge_df['WV+ Arbeit Tarif 1 diff'].astype(float)

# calculating diff
merge_df[['Wärmeenergie Tarif 1_diff', 'Wärmeenergie Tarif 1.1_diff', 'Volumen Kanal 1_diff', 'WV+ Arbeit Tarif 1_diff', 'Durchfluss_diff']] = merge_df[['Wärmeenergie Tarif 1', 
                                'Durchfluss', 'Volumen Kanal 1', 'WV+ Arbeit Tarif 1', 'Wärmeenergie Tarif 1.1']].diff(periods = -1)


oh14_lecture_period_winter_21_22 = merge_df[(merge_df['Date'] >= '2021-10-11') & (merge_df['Date'] <= '2022-02-04')]
oh14_lecture_period_summer_21 = merge_df[(merge_df['Date'] >= '2021-04-11') & (merge_df['Date'] <= '2021-07-15')]
oh14_winter_break_df = merge_df[(merge_df['Date'] > '2021-12-24') & (merge_df['Date'] <= '2022-01-07')]
oh14_summer_break_21 = merge_df[(merge_df['Date'] > '2021-07-15') & (merge_df['Date'] <= '2021-09-30')]

In [7]:
# finding correlation between numerical columns using df.corr() function.
col_names = ['Wert', 'Wärmeenergie Tarif 1_diff', 'Wärmeenergie Tarif 1.1_diff', 'Volumen Kanal 1_diff',
             'WV+ Arbeit Tarif 1_diff', 'Durchfluss_diff']
# oh14_lecture_period_winter_21_22[['Wert', 'WV+ Arbeit Tarif 1', 'Wärmeenergie Tarif 1', 'Durchfluss',
#           'Volumen Kanal 1', 'Wärmeenergie Tarif 1.1']].corr()

In [8]:
oh14_lecture_period_winter_21_22[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Wärmeenergie Tarif 1.1_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff,Durchfluss_diff
Wert,1.0,-0.427242,-0.018608,0.052279,0.091592,0.215015
Wärmeenergie Tarif 1_diff,-0.427242,1.0,0.118431,0.047629,0.588793,0.15119
Wärmeenergie Tarif 1.1_diff,-0.018608,0.118431,1.0,-0.030343,-0.071075,0.0282
Volumen Kanal 1_diff,0.052279,0.047629,-0.030343,1.0,0.056607,-0.116787
WV+ Arbeit Tarif 1_diff,0.091592,0.588793,-0.071075,0.056607,1.0,0.348214
Durchfluss_diff,0.215015,0.15119,0.0282,-0.116787,0.348214,1.0


In [9]:
# finding correlation between numerical columns using df.corr() function.
oh14_lecture_period_summer_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Wärmeenergie Tarif 1.1_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff,Durchfluss_diff
Wert,1.0,-0.056356,0.075065,,-0.05616,-0.05616
Wärmeenergie Tarif 1_diff,-0.056356,1.0,0.015707,,1.0,1.0
Wärmeenergie Tarif 1.1_diff,0.075065,0.015707,1.0,,0.015756,0.015759
Volumen Kanal 1_diff,,,,,,
WV+ Arbeit Tarif 1_diff,-0.05616,1.0,0.015756,,1.0,1.0
Durchfluss_diff,-0.05616,1.0,0.015759,,1.0,1.0


In [47]:
oh14_winter_break_df[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Wärmeenergie Tarif 1.1_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff,Durchfluss_diff
Wert,1.0,-0.674306,-0.004697,0.362421,-0.410079,-0.089912
Wärmeenergie Tarif 1_diff,-0.674306,1.0,0.052312,-0.473487,0.732647,0.250889
Wärmeenergie Tarif 1.1_diff,-0.004697,0.052312,1.0,-0.003657,-0.088646,-0.017132
Volumen Kanal 1_diff,0.362421,-0.473487,-0.003657,1.0,-0.416126,0.016568
WV+ Arbeit Tarif 1_diff,-0.410079,0.732647,-0.088646,-0.416126,1.0,0.188922
Durchfluss_diff,-0.089912,0.250889,-0.017132,0.016568,0.188922,1.0


In [48]:
oh14_summer_break_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Wärmeenergie Tarif 1.1_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff,Durchfluss_diff
Wert,1.0,-0.078937,-0.011909,0.032593,0.381007,0.498584
Wärmeenergie Tarif 1_diff,-0.078937,1.0,0.210751,0.288167,0.31545,0.033586
Wärmeenergie Tarif 1.1_diff,-0.011909,0.210751,1.0,-0.113974,-0.013675,0.013435
Volumen Kanal 1_diff,0.032593,0.288167,-0.113974,1.0,0.674437,0.070597
WV+ Arbeit Tarif 1_diff,0.381007,0.31545,-0.013675,0.674437,1.0,0.423263
Durchfluss_diff,0.498584,0.033586,0.013435,0.070597,0.423263,1.0


#### analysis for oh12

In [49]:
# getting only those rows which are recording on hourly bases
oh12_hourly_df = oh12_df[oh12_df['Time'].dt.minute == 0]
outside_temp_df['Zeitstempel'] = pd.to_datetime(outside_temp_df['Zeitstempel']) # covert to datetime format

# taking some selcted columns
oh12_en = oh12_hourly_df[['Time', 'Date', 'Wärmeenergie total', 'Durchfluss', 'Vorlauftemperatur', 'Rücklauftemperatur',
                          'Wärmeleistung', 'WV+ Arbeit Tarif 1', 'WV+ Arbeit Tarif 1.1', 'WV+ Arbeit Tarif 1.2']]
oh12_en['Time'] = oh12_en['Time'].astype('str')

# changing data type to float 
oh12_en[oh12_en.columns[~oh12_en.columns.isin(['Time', 'Date'])]] = oh12_en[oh12_en.columns[~oh12_en.columns.isin(['Time', 'Date'])]].astype('float')

outside_temp_df['Zeitstempel'] = outside_temp_df['Zeitstempel'].astype(str)

# merging the dataframe with outside temperature file (using inner join to find all values)
merge_df = oh12_en.merge(outside_temp_df, left_on=['Time'], right_on=['Zeitstempel'], how = 'inner')


# merge_df.columns

merge_df[['Wärmeenergie total_diff', 'Durchfluss_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff', 'WV+ Arbeit Tarif 1.2_diff']] = merge_df[['Wärmeenergie total', 
                                        'Durchfluss', 'WV+ Arbeit Tarif 1', 'WV+ Arbeit Tarif 1.1', 'WV+ Arbeit Tarif 1.2']].diff()

oh12_lecture_period_winter_21_22 = merge_df[(merge_df['Date'] >= '2021-10-11') & (merge_df['Date'] <= '2022-02-04')]
oh12_lecture_period_winter_20_21 = merge_df[(merge_df['Date'] >= '2020-10-11') & (merge_df['Date'] <= '2021-02-04')]
oh12_lecture_period_summer_21 = merge_df[(merge_df['Date'] >= '2021-04-11') & (merge_df['Date'] <= '2021-07-15')]
oh12_winter_break_df = merge_df[(merge_df['Date'] > '2021-12-24') & (merge_df['Date'] <= '2022-01-07')]
oh12_winter_break_20_21 = merge_df[(merge_df['Date'] > '2020-12-24') & (merge_df['Date'] <= '2021-01-07')]
oh12_summer_break_21 = merge_df[(merge_df['Date'] > '2021-07-15') & (merge_df['Date'] <= '2021-09-30')]

In [50]:
col_names = ['Wert', 'Wärmeenergie total_diff', 'Durchfluss_diff', 'WV+ Arbeit Tarif 1_diff',
             'WV+ Arbeit Tarif 1.1_diff', 'WV+ Arbeit Tarif 1.2_diff']
oh12_lecture_period_winter_21_22[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie total_diff,Durchfluss_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.007842,0.010762,0.005528,-0.095601,0.00382
Wärmeenergie total_diff,0.007842,1.0,-0.015326,0.999979,0.419772,0.994531
Durchfluss_diff,0.010762,-0.015326,1.0,-0.015957,-0.085343,-0.016837
WV+ Arbeit Tarif 1_diff,0.005528,0.999979,-0.015957,1.0,0.698014,0.994533
WV+ Arbeit Tarif 1.1_diff,-0.095601,0.419772,-0.085343,0.698014,1.0,-0.018963
WV+ Arbeit Tarif 1.2_diff,0.00382,0.994531,-0.016837,0.994533,-0.018963,1.0


In [51]:
oh12_lecture_period_summer_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie total_diff,Durchfluss_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.44787,0.055673,-0.228526,-0.294818,-0.009243
Wärmeenergie total_diff,0.44787,1.0,0.09301,0.127982,0.221718,0.003104
Durchfluss_diff,0.055673,0.09301,1.0,-0.083932,-0.090623,0.002066
WV+ Arbeit Tarif 1_diff,-0.228526,0.127982,-0.083932,1.0,0.568081,-0.561077
WV+ Arbeit Tarif 1.1_diff,-0.294818,0.221718,-0.090623,0.568081,1.0,0.082719
WV+ Arbeit Tarif 1.2_diff,-0.009243,0.003104,0.002066,-0.561077,0.082719,1.0


In [52]:
oh12_winter_break_df[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie total_diff,Durchfluss_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.251367,-0.017991,0.262595,0.194061,-0.077325
Wärmeenergie total_diff,0.251367,1.0,0.120217,0.453701,0.622064,-0.041211
Durchfluss_diff,-0.017991,0.120217,1.0,-0.068913,-0.072644,-0.033662
WV+ Arbeit Tarif 1_diff,0.262595,0.453701,-0.068913,1.0,0.701396,-0.634553
WV+ Arbeit Tarif 1.1_diff,0.194061,0.622064,-0.072644,0.701396,1.0,-0.035494
WV+ Arbeit Tarif 1.2_diff,-0.077325,-0.041211,-0.033662,-0.634553,-0.035494,1.0


In [53]:
oh12_summer_break_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie total_diff,Durchfluss_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.273604,0.026057,-0.253941,-0.320145,0.000697
Wärmeenergie total_diff,0.273604,1.0,0.002542,0.072156,0.190169,0.053572
Durchfluss_diff,0.026057,0.002542,1.0,-0.03716,-0.043482,0.017362
WV+ Arbeit Tarif 1_diff,-0.253941,0.072156,-0.03716,1.0,0.541675,-0.683946
WV+ Arbeit Tarif 1.1_diff,-0.320145,0.190169,-0.043482,0.541675,1.0,0.023301
WV+ Arbeit Tarif 1.2_diff,0.000697,0.053572,0.017362,-0.683946,0.023301,1.0


In [54]:
oh12_lecture_period_winter_20_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie total_diff,Durchfluss_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.320275,0.015504,-0.019674,-0.18427,-0.078792
Wärmeenergie total_diff,0.320275,1.0,0.054568,-0.020878,0.363605,-0.022133
Durchfluss_diff,0.015504,0.054568,1.0,-0.00856,-0.084199,-0.006858
WV+ Arbeit Tarif 1_diff,-0.019674,-0.020878,-0.00856,1.0,-0.021836,-0.000895
WV+ Arbeit Tarif 1.1_diff,-0.18427,0.363605,-0.084199,-0.021836,1.0,0.012568
WV+ Arbeit Tarif 1.2_diff,-0.078792,-0.022133,-0.006858,-0.000895,0.012568,1.0


In [55]:
oh12_winter_break_20_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie total_diff,Durchfluss_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.257992,0.000601,0.131682,0.171236,-0.005212
Wärmeenergie total_diff,0.257992,1.0,0.079839,0.283457,0.52691,-0.018429
Durchfluss_diff,0.000601,0.079839,1.0,-0.039842,-0.075527,-0.033228
WV+ Arbeit Tarif 1_diff,0.131682,0.283457,-0.039842,1.0,0.593299,-0.695554
WV+ Arbeit Tarif 1.1_diff,0.171236,0.52691,-0.075527,0.593299,1.0,-0.008256
WV+ Arbeit Tarif 1.2_diff,-0.005212,-0.018429,-0.033228,-0.695554,-0.008256,1.0


#### analysis for kita_hokida

In [62]:
kita_hokida_df.shape

(72129, 9)

In [26]:
# getting only those rows which are recording on hourly bases
kita_hourly_df = kita_hokida_df[kita_hokida_df['Time'].dt.minute == 0]
outside_temp_df['Zeitstempel'] = pd.to_datetime(outside_temp_df['Zeitstempel']) # covert to datetime format

# taking some selcted columns
kita_en = kita_hourly_df[['Time', 'Date', 'Wärmeenergie Tarif 1', 'Durchfluss', 'Vorlauftemperatur', 'Rücklauftemperatur',
                          'Wärmeleistung', 'Volumen Kanal 1', 'WV+ Arbeit Tarif 1']]
kita_en['Time'] = kita_en['Time'].astype('str')
kita_en[kita_en.columns[~kita_en.columns.isin(['Time', 'Date'])]] = kita_en[kita_en.columns[~kita_en.columns.isin(['Time', 'Date'])]].astype('float')

outside_temp_df['Zeitstempel'] = outside_temp_df['Zeitstempel'].astype(str)

# merging the dataframe with outside temperature file (using inner join to find all values)
merge_df = kita_en.merge(outside_temp_df, left_on=['Time'], right_on=['Zeitstempel'], how = 'inner')

# changing data type to float 
# merge_df.columns
# merge_df['WV+ Arbeit Tarif 1 diff'] = merge_df['WV+ Arbeit Tarif 1 diff'].astype(float)

merge_df[['Wärmeenergie Tarif 1_diff', 'Volumen Kanal 1_diff', 'WV+ Arbeit Tarif 1_diff']] = merge_df[['Wärmeenergie Tarif 1', 'Volumen Kanal 1', 'WV+ Arbeit Tarif 1']].diff(periods = -1)

kita_lecture_period_winter_21_22 = merge_df[(merge_df['Date'] >= '2021-10-11') & (merge_df['Date'] <= '2022-02-04')]
kita_lecture_period_winter_20_21 = merge_df[(merge_df['Date'] >= '2020-10-11') & (merge_df['Date'] <= '2021-02-04')]
kita_lecture_period_summer_21 = merge_df[(merge_df['Date'] >= '2021-04-11') & (merge_df['Date'] <= '2021-07-15')]
kita_winter_break_df = merge_df[(merge_df['Date'] > '2021-12-24') & (merge_df['Date'] <= '2022-01-07')]
kita_winter_break_20_21 = merge_df[(merge_df['Date'] > '2020-12-24') & (merge_df['Date'] <= '2021-01-07')]
kita_summer_break_21 = merge_df[(merge_df['Date'] > '2021-07-15') & (merge_df['Date'] <= '2021-09-30')]

In [75]:
col_names = ['Wert', 'Wärmeenergie Tarif 1_diff', 'Durchfluss', 'Volumen Kanal 1_diff', 'WV+ Arbeit Tarif 1_diff']
kita_lecture_period_winter_21_22[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff
Wert,1.0,-0.090124,-0.112764,0.136682,0.102291
Wärmeenergie Tarif 1_diff,-0.090124,1.0,0.868624,0.516333,0.572216
Durchfluss,-0.112764,0.868624,1.0,0.397545,0.47317
Volumen Kanal 1_diff,0.136682,0.516333,0.397545,1.0,0.877429
WV+ Arbeit Tarif 1_diff,0.102291,0.572216,0.47317,0.877429,1.0


In [76]:
kita_lecture_period_winter_20_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff
Wert,1.0,-0.186427,-0.225905,0.244541,0.15313
Wärmeenergie Tarif 1_diff,-0.186427,1.0,0.878112,0.257598,0.305928
Durchfluss,-0.225905,0.878112,1.0,0.154519,0.244796
Volumen Kanal 1_diff,0.244541,0.257598,0.154519,1.0,0.761616
WV+ Arbeit Tarif 1_diff,0.15313,0.305928,0.244796,0.761616,1.0


In [77]:
kita_lecture_period_summer_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff
Wert,1.0,-0.198747,-0.160687,0.274523,0.023689
Wärmeenergie Tarif 1_diff,-0.198747,1.0,0.776116,0.162985,0.42737
Durchfluss,-0.160687,0.776116,1.0,0.120821,0.307308
Volumen Kanal 1_diff,0.274523,0.162985,0.120821,1.0,0.594008
WV+ Arbeit Tarif 1_diff,0.023689,0.42737,0.307308,0.594008,1.0


In [78]:
kita_winter_break_df[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff
Wert,1.0,-0.204404,-0.23109,-0.088099,-0.087792
Wärmeenergie Tarif 1_diff,-0.204404,1.0,0.883786,0.433321,0.471554
Durchfluss,-0.23109,0.883786,1.0,0.396321,0.427422
Volumen Kanal 1_diff,-0.088099,0.433321,0.396321,1.0,0.880849
WV+ Arbeit Tarif 1_diff,-0.087792,0.471554,0.427422,0.880849,1.0


In [79]:
kita_winter_break_20_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff
Wert,1.0,0.078658,0.054677,-0.123385,-0.160698
Wärmeenergie Tarif 1_diff,0.078658,1.0,0.928971,0.182627,0.15683
Durchfluss,0.054677,0.928971,1.0,0.18513,0.170262
Volumen Kanal 1_diff,-0.123385,0.182627,0.18513,1.0,0.814135
WV+ Arbeit Tarif 1_diff,-0.160698,0.15683,0.170262,0.814135,1.0


In [80]:
kita_summer_break_21[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff
Wert,1.0,-0.038766,-0.022753,0.344983,0.335668
Wärmeenergie Tarif 1_diff,-0.038766,1.0,0.597101,0.156898,0.334641
Durchfluss,-0.022753,0.597101,1.0,0.114074,0.216613
Volumen Kanal 1_diff,0.344983,0.156898,0.114074,1.0,0.61396
WV+ Arbeit Tarif 1_diff,0.335668,0.334641,0.216613,0.61396,1.0


#### analysis for chemie

In [57]:
chemie_df.shape

(82226, 25)

In [61]:
# getting only those rows which are recording on hourly bases
chemie_hourly_df = chemie_df[chemie_df['Time'].dt.minute == 0]
outside_temp_df['Zeitstempel'] = pd.to_datetime(outside_temp_df['Zeitstempel']) # covert to datetime format

# taking some selcted columns
chemie_en = chemie_hourly_df[['Time', 'Date', 'Wärmeenergie Tarif 1', 'WV+ Arbeit Tarif 1', 'WV+ Arbeit Tarif 1.1']]
chemie_en['Time'] = chemie_en['Time'].astype('str')
chemie_en[chemie_en.columns[~chemie_en.columns.isin(['Time', 'Date'])]] = chemie_en[chemie_en.columns[~chemie_en.columns.isin(['Time', 'Date'])]].astype('float')

outside_temp_df['Zeitstempel'] = outside_temp_df['Zeitstempel'].astype(str)

# merging the dataframe with outside temperature file (using inner join to find all values)
merge_df = chemie_en.merge(outside_temp_df, left_on=['Time'], right_on=['Zeitstempel'], how = 'inner')

# changing data type to float 
# merge_df.columns
# merge_df['WV+ Arbeit Tarif 1 diff'] = merge_df['WV+ Arbeit Tarif 1 diff'].astype(float)

merge_df[['Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']] = merge_df[['Wärmeenergie Tarif 1',
                                                    'WV+ Arbeit Tarif 1', 'WV+ Arbeit Tarif 1.1']].diff()

chemie_lecture_period_winter_21_22 = merge_df[(merge_df['Date'] >= '2021-10-11') & (merge_df['Date'] <= '2022-02-04')]
chemie_lecture_period_winter_20_21 = merge_df[(merge_df['Date'] >= '2020-10-11') & (merge_df['Date'] <= '2021-02-04')]
chemie_lecture_period_summer_21 = merge_df[(merge_df['Date'] >= '2021-04-11') & (merge_df['Date'] <= '2021-07-15')]
chemie_winter_break_df = merge_df[(merge_df['Date'] > '2021-12-24') & (merge_df['Date'] <= '2022-01-07')]
chemie_winter_break_20_21 = merge_df[(merge_df['Date'] > '2020-12-24') & (merge_df['Date'] <= '2021-01-07')]
chemie_summer_break_21 = merge_df[(merge_df['Date'] > '2021-07-15') & (merge_df['Date'] <= '2021-09-30')]

In [63]:
chemie_lecture_period_summer_21.shape

(2304, 14)

In [62]:
chemie_lecture_period_summer_21[['Wert', 'Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.882477,-0.194896,0.012737
Wärmeenergie Tarif 1_diff,0.882477,1.0,0.01089,0.012286
WV+ Arbeit Tarif 1_diff,-0.194896,0.01089,1.0,-0.002286
WV+ Arbeit Tarif 1.1_diff,0.012737,0.012286,-0.002286,1.0


In [66]:
chemie_lecture_period_winter_20_21[['Wert', 'Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.879877,-0.126182,-0.210357
Wärmeenergie Tarif 1_diff,0.879877,1.0,0.14766,-0.234195
WV+ Arbeit Tarif 1_diff,-0.126182,0.14766,1.0,0.090337
WV+ Arbeit Tarif 1.1_diff,-0.210357,-0.234195,0.090337,1.0


In [67]:
chemie_lecture_period_winter_21_22[['Wert', 'Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.006529,0.005305,0.005377
Wärmeenergie Tarif 1_diff,0.006529,1.0,0.999999,0.999997
WV+ Arbeit Tarif 1_diff,0.005305,0.999999,1.0,0.999998
WV+ Arbeit Tarif 1.1_diff,0.005377,0.999997,0.999998,1.0


In [68]:
chemie_summer_break_21[['Wert', 'Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.72901,-0.465263,0.012702
Wärmeenergie Tarif 1_diff,0.72901,1.0,-0.18518,0.008458
WV+ Arbeit Tarif 1_diff,-0.465263,-0.18518,1.0,-0.002345
WV+ Arbeit Tarif 1.1_diff,0.012702,0.008458,-0.002345,1.0


In [69]:
chemie_winter_break_20_21[['Wert', 'Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.49813,0.221593,0.012974
Wärmeenergie Tarif 1_diff,0.49813,1.0,0.710706,0.019654
WV+ Arbeit Tarif 1_diff,0.221593,0.710706,1.0,-0.003632
WV+ Arbeit Tarif 1.1_diff,0.012974,0.019654,-0.003632,1.0


In [70]:
chemie_winter_break_df[['Wert', 'Wärmeenergie Tarif 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.946285,0.31387,0.001925
Wärmeenergie Tarif 1_diff,0.946285,1.0,0.516484,-0.00679
WV+ Arbeit Tarif 1_diff,0.31387,0.516484,1.0,-0.016523
WV+ Arbeit Tarif 1.1_diff,0.001925,-0.00679,-0.016523,1.0


#### analysis for Großtagespflege

In [102]:
# For Großtagespflege we have data from 2021-10-24  to 2022-07-19.

In [86]:
# getting only those rows which are recording on hourly bases
gross_hourly_df = gross_df[gross_df['Time'].dt.minute == 0]
outside_temp_df['Zeitstempel'] = pd.to_datetime(outside_temp_df['Zeitstempel']) # covert to datetime format

# taking some selcted columns
gross_en = gross_hourly_df[['Time', 'Date', 'Wärmeenergie Tarif 1', 'Durchfluss', 'Volumen Kanal 1',
                            'WV+ Arbeit Tarif 1.1', 'WV+ Arbeit Tarif 1.2']]
gross_en['Time'] = gross_en['Time'].astype('str')
gross_en[gross_en.columns[~gross_en.columns.isin(['Time', 'Date'])]] = gross_en[gross_en.columns[~gross_en.columns.isin(['Time', 'Date'])]].astype('float')

outside_temp_df['Zeitstempel'] = outside_temp_df['Zeitstempel'].astype(str)

# merging the dataframe with outside temperature file (using inner join to find all values)
merge_df = gross_en.merge(outside_temp_df, left_on=['Time'], right_on=['Zeitstempel'], how = 'inner')

# changing data type to float 
# merge_df.columns
# merge_df['WV+ Arbeit Tarif 1 diff'] = merge_df['WV+ Arbeit Tarif 1 diff'].astype(float)

merge_df[['Wärmeenergie Tarif 1_diff', 'Durchfluss_diff', 'Volumen Kanal 1_diff', 'WV+ Arbeit Tarif 1.1_diff', 'WV+ Arbeit Tarif 1.2_diff']] = merge_df[['Wärmeenergie Tarif 1', 'Durchfluss', 'Volumen Kanal 1',
                            'WV+ Arbeit Tarif 1.1', 'WV+ Arbeit Tarif 1.2']].diff()

gross_lecture_period_winter_21_22 = merge_df[(merge_df['Date'] >= '2021-10-11') & (merge_df['Date'] <= '2022-02-04')]
# gross_lecture_period_winter_20_21 = merge_df[(merge_df['Date'] >= '2020-10-11') & (merge_df['Date'] <= '2021-02-04')]
# gross_lecture_period_summer_21 = merge_df[(merge_df['Date'] >= '2021-04-11') & (merge_df['Date'] <= '2021-07-15')]
gross_winter_break_df = merge_df[(merge_df['Date'] > '2021-12-24') & (merge_df['Date'] <= '2022-01-07')]
# gross_winter_break_20_21 = merge_df[(merge_df['Date'] > '2020-12-24') & (merge_df['Date'] <= '2021-01-07')]
# gross_summer_break_21 = merge_df[(merge_df['Date'] > '2021-07-15') & (merge_df['Date'] <= '2021-09-30')]

In [90]:
col_names = ['Wert', 'Wärmeenergie Tarif 1_diff', 'Durchfluss_diff', 'Volumen Kanal 1_diff',
             'WV+ Arbeit Tarif 1.1_diff', 'WV+ Arbeit Tarif 1.2_diff']
gross_lecture_period_winter_21_22[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.179453,0.062063,0.067013,0.053371,0.041534
Wärmeenergie Tarif 1_diff,0.179453,1.0,0.280413,0.280579,0.361881,0.317202
Durchfluss_diff,0.062063,0.280413,1.0,0.040395,0.019298,0.009746
Volumen Kanal 1_diff,0.067013,0.280579,0.040395,1.0,0.850803,0.74173
WV+ Arbeit Tarif 1.1_diff,0.053371,0.361881,0.019298,0.850803,1.0,0.834299
WV+ Arbeit Tarif 1.2_diff,0.041534,0.317202,0.009746,0.74173,0.834299,1.0


In [93]:
# gross_lecture_period_winter_20_21[col_names].corr() # no data
# gross_lecture_period_summer_21[col_names].corr() # no data
gross_winter_break_df[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1.1_diff,WV+ Arbeit Tarif 1.2_diff
Wert,1.0,0.16549,0.035564,0.034475,0.029959,0.034356
Wärmeenergie Tarif 1_diff,0.16549,1.0,0.290979,0.340005,0.365333,0.367657
Durchfluss_diff,0.035564,0.290979,1.0,0.011126,0.011933,0.010211
Volumen Kanal 1_diff,0.034475,0.340005,0.011126,1.0,0.964986,0.965113
WV+ Arbeit Tarif 1.1_diff,0.029959,0.365333,0.011933,0.964986,1.0,0.999719
WV+ Arbeit Tarif 1.2_diff,0.034356,0.367657,0.010211,0.965113,0.999719,1.0


#### analysis for HG II

In [105]:
# For HG II we have data from 2021-09-22  to 2022-07-19.

In [108]:
# getting only those rows which are recording on hourly bases
hg_2_hourly_df = hg_2_df[hg_2_df['Time'].dt.minute == 0]
outside_temp_df['Zeitstempel'] = pd.to_datetime(outside_temp_df['Zeitstempel']) # covert to datetime format

# taking some selcted columns
hg_2_en = hg_2_hourly_df[['Time', 'Date', 'Wärmeenergie Tarif 1', 'Durchfluss', 'Volumen Kanal 1',
                            'WV+ Arbeit Tarif 1', 'WV+ Arbeit Tarif 1.1']]
hg_2_en['Time'] = hg_2_en['Time'].astype('str')

# changing data type to float
hg_2_en[hg_2_en.columns[~hg_2_en.columns.isin(['Time', 'Date'])]] = hg_2_en[hg_2_en.columns[~hg_2_en.columns.isin(['Time', 'Date'])]].astype('float')

outside_temp_df['Zeitstempel'] = outside_temp_df['Zeitstempel'].astype(str)

# merging the dataframe with outside temperature file (using inner join to find all values)
merge_df = hg_2_en.merge(outside_temp_df, left_on=['Time'], right_on=['Zeitstempel'], how = 'inner')
 
# merge_df.column
# calculating diff
merge_df[['Wärmeenergie Tarif 1_diff', 'Durchfluss_diff', 'Volumen Kanal 1_diff', 'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']] = merge_df[['Wärmeenergie Tarif 1', 'Durchfluss', 'Volumen Kanal 1',
                            'WV+ Arbeit Tarif 1', 'WV+ Arbeit Tarif 1.1']].diff()

hg_2_lecture_period_winter_21_22 = merge_df[(merge_df['Date'] >= '2021-10-11') & (merge_df['Date'] <= '2022-02-04')]
# hg_2_lecture_period_winter_20_21 = merge_df[(merge_df['Date'] >= '2020-10-11') & (merge_df['Date'] <= '2021-02-04')]
# hg_2_lecture_period_summer_21 = merge_df[(merge_df['Date'] >= '2021-04-11') & (merge_df['Date'] <= '2021-07-15')]
hg_2_winter_break_df = merge_df[(merge_df['Date'] > '2021-12-24') & (merge_df['Date'] <= '2022-01-07')]
# hg_2_winter_break_20_21 = merge_df[(merge_df['Date'] > '2020-12-24') & (merge_df['Date'] <= '2021-01-07')]
# hg_2_summer_break_21 = merge_df[(merge_df['Date'] > '2021-07-15') & (merge_df['Date'] <= '2021-09-30')]

In [111]:
col_names = ['Wert', 'Wärmeenergie Tarif 1_diff', 'Durchfluss_diff', 'Volumen Kanal 1_diff',
             'WV+ Arbeit Tarif 1_diff', 'WV+ Arbeit Tarif 1.1_diff']
hg_2_lecture_period_winter_21_22[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.203271,0.011531,0.006158,0.004911,0.006145
Wärmeenergie Tarif 1_diff,0.203271,1.0,0.123632,0.298785,0.877234,0.207541
Durchfluss_diff,0.011531,0.123632,1.0,0.047045,0.087067,0.02852
Volumen Kanal 1_diff,0.006158,0.298785,0.047045,1.0,0.999629,0.999627
WV+ Arbeit Tarif 1_diff,0.004911,0.877234,0.087067,0.999629,1.0,0.999946
WV+ Arbeit Tarif 1.1_diff,0.006145,0.207541,0.02852,0.999627,0.999946,1.0


In [112]:
hg_2_winter_break_df[col_names].corr()

Unnamed: 0,Wert,Wärmeenergie Tarif 1_diff,Durchfluss_diff,Volumen Kanal 1_diff,WV+ Arbeit Tarif 1_diff,WV+ Arbeit Tarif 1.1_diff
Wert,1.0,0.41412,0.011559,0.249856,0.336669,0.240311
Wärmeenergie Tarif 1_diff,0.41412,1.0,0.119796,0.358286,0.954694,0.593696
Durchfluss_diff,0.011559,0.119796,1.0,0.085115,0.079368,0.023251
Volumen Kanal 1_diff,0.249856,0.358286,0.085115,1.0,0.284726,0.401398
WV+ Arbeit Tarif 1_diff,0.336669,0.954694,0.079368,0.284726,1.0,0.651201
WV+ Arbeit Tarif 1.1_diff,0.240311,0.593696,0.023251,0.401398,0.651201,1.0
