# Combine electricity readings with weather data

Here we have taken the combined electricity logs [1](../dataclean/combine_datasets.ipynb) and the interpolated weather data retrieved from Vacketvader.com [2]('../retrieval/InterpolateWeatherData.ipynb).
The electricity logs contain readings every minute, whilst the weather data, was read at intervals of 10 ,20, 30 or 40 minute has been interpoplated to every 10 minutes.

Thus the records have been merged with each waethe reading apllicable to ten electricity log readings.

Converting the string based timestamp to a dataframe datetime type simplified the join as as a simple integer comparison , including an offset of 10 minute (600 seconds)

----

In [3]:
# make copies of files for use
# command format: file.copy("source_file.txt", "destination_folder")

file.copy("../dataclean/database/ssen_merged.csv", "./database", overwrite = TRUE)
file.copy("../retrieval/database/InterpolatedVackerWeather.csv", "./database", overwrite = TRUE)

In [4]:
# copy files to dataframes
ssen_data <- read.csv("./database/ssen_merged.csv", header=TRUE, sep=",", dec=".")
weather <- read.csv("./database/InterpolatedVackerWeather.csv", header=TRUE, sep=",", dec=".")

In [5]:
head(ssen_data)

elec_timestamp,demand,demand_max,anm_generated,non_anm_generated,total_generation_capacity,ANM_timestamp,operation_core,eqpt_core,site_core,...,site_2B,operation_3,eqpt_3,site_3,operation_4,eqpt_4,site_4,operation_4A,eqpt_4A,site_4A
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<fct>,<fct>,<fct>,...,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>
2019-01-16 22:42:03,19.66,35.7,17.145,17.075,57.1,2019-01-16 22:42:33,ok,ok,ok,...,ok,ok,ok,ok,ok,ok,ok,ok,ok,ok
2019-01-16 22:43:03,19.02,35.7,16.829,17.52,57.1,2019-01-16 22:42:33,ok,ok,ok,...,ok,ok,ok,ok,ok,ok,ok,ok,ok,ok
2019-01-16 22:44:03,18.4,35.7,14.394,17.245,57.1,2019-01-16 22:42:33,ok,ok,ok,...,ok,ok,ok,ok,ok,ok,ok,ok,ok,ok
2019-01-16 22:45:03,18.25,35.7,13.674,17.446,57.1,2019-01-16 22:42:33,ok,ok,ok,...,ok,ok,ok,ok,ok,ok,ok,ok,ok,ok
2019-01-16 22:46:03,18.7,35.7,14.572,16.457,57.1,2019-01-16 22:46:25,ok,ok,ok,...,ok,ok,ok,ok,ok,ok,ok,ok,ok,ok
2019-01-16 22:47:03,18.57,35.7,14.0,16.448,57.1,2019-01-16 22:46:25,ok,ok,ok,...,ok,ok,ok,ok,ok,ok,ok,ok,ok,ok


In [6]:
head(weather)

X,dt,approxWindSpeed,approxWindDirection,approxPressure,approxTemp
<int>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>
1,2018-12-22 00:20:00,2.6,230.0,997,4.0
2,2018-12-22 00:30:00,2.766667,226.6667,997,4.333333
3,2018-12-22 00:40:00,2.933333,223.3333,997,4.666667
4,2018-12-22 00:50:00,3.1,220.0,997,5.0
5,2018-12-22 01:00:00,3.266667,223.3333,997,5.0
6,2018-12-22 01:10:00,3.433333,226.6667,997,5.0


In [9]:
# make datetime timestamp from string timestamp(column 1 - dt)  ignoring seconds

ssen_data$dt <- as.POSIXct(ssen_data$elec_timestamp,format="%Y-%m-%d %H:%M",tz="GMT")

In [26]:
ssen_data[1:6, c(1,7,35)]

elec_timestamp,ANM_timestamp,dt
<fct>,<fct>,<dttm>
2019-01-16 22:42:03,2019-01-16 22:42:33,2019-01-16 22:42:00
2019-01-16 22:43:03,2019-01-16 22:42:33,2019-01-16 22:43:00
2019-01-16 22:44:03,2019-01-16 22:42:33,2019-01-16 22:44:00
2019-01-16 22:45:03,2019-01-16 22:42:33,2019-01-16 22:45:00
2019-01-16 22:46:03,2019-01-16 22:46:25,2019-01-16 22:46:00
2019-01-16 22:47:03,2019-01-16 22:46:25,2019-01-16 22:47:00


In [15]:
# convert string timestamp to datetime
weather$dt <- as.POSIXct(weather$dt,format="%Y-%m-%d %H:%M:%S",tz="GMT")

In [16]:
head(weather)

X,dt,approxWindSpeed,approxWindDirection,approxPressure,approxTemp
<int>,<dttm>,<dbl>,<dbl>,<dbl>,<dbl>
1,2018-12-22 00:20:00,2.6,230.0,997,4.0
2,2018-12-22 00:30:00,2.766667,226.6667,997,4.333333
3,2018-12-22 00:40:00,2.933333,223.3333,997,4.666667
4,2018-12-22 00:50:00,3.1,220.0,997,5.0
5,2018-12-22 01:00:00,3.266667,223.3333,997,5.0
6,2018-12-22 01:10:00,3.433333,226.6667,997,5.0


In [27]:
library(sqldf)

In [18]:
# perform join of minute by minute electricity readings with 10 minute (600 second) interval weather records
combined <- sqldf("select * from ssen_data d inner join weather w 
                  on (d.dt >= w.dt and d.dt< (w.dt + 600)) ")

In [22]:
# display successful join - weather timestamp is displayed as a  unix epoch timestamp (mins since 1970-1-1 00:00)
combined[1:20 ,c(1,34,35,36,37)]

elec_timestamp,site_4A,dt,X,dt..37
<fct>,<fct>,<dttm>,<int>,<dbl>
2019-01-16 22:42:03,ok,2019-01-16 22:42:00,3735,1547678400
2019-01-16 22:43:03,ok,2019-01-16 22:43:00,3735,1547678400
2019-01-16 22:44:03,ok,2019-01-16 22:44:00,3735,1547678400
2019-01-16 22:45:03,ok,2019-01-16 22:45:00,3735,1547678400
2019-01-16 22:46:03,ok,2019-01-16 22:46:00,3735,1547678400
2019-01-16 22:47:03,ok,2019-01-16 22:47:00,3735,1547678400
2019-01-16 22:48:03,ok,2019-01-16 22:48:00,3735,1547678400
2019-01-16 22:49:03,ok,2019-01-16 22:49:00,3735,1547678400
2019-01-16 22:50:03,ok,2019-01-16 22:50:00,3736,1547679000
2019-01-16 22:51:03,ok,2019-01-16 22:51:00,3736,1547679000


In [23]:
# export data as comma separated variable file.
write.csv(combined, './database/combined_ssen_weather.csv')