# Time Series

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import pickle

In [27]:
with open("proj5_params.json", "r") as file:
  params = json.load(file)
params

{'original_frequency': 'D',
 'target_frequency': 'W',
 'downsample_periods': 3,
 'downsample_units': 'd',
 'upsample_periods': 2,
 'upsample_units': 'h',
 'interpolation': 'polynomial',
 'interpolation_order': 3,
 'sensors_periods': 10,
 'sensors_units': 's'}

### Prepare time series datasets

In [28]:
df = pd.read_csv("proj5_timeseries.csv")
df

Unnamed: 0,Date,Consumption [Wh],Wind,Solar,Wind+Solar
0,2006-01-01,1069.18400,,,
1,2006-01-02,1380.52100,,,
2,2006-01-03,1442.53300,,,
3,2006-01-04,1457.21700,,,
4,2006-01-05,1477.13100,,,
...,...,...,...,...,...
4380,2017-12-27,1263.94091,394.507,16.530,411.037
4381,2017-12-28,1299.86398,506.424,14.162,520.586
4382,2017-12-29,1295.08753,584.277,29.854,614.131
4383,2017-12-30,1215.44897,721.247,7.467,728.714


In [29]:
# Changing column names
import re
def rename_columns(df):
  new_names = []
  for col in df.columns:
    name = col.lower()
    name = re.sub(r'[^a-z]', '_', name)
    new_names.append(name)
  df.columns = new_names


rename_columns(df)
df.head()

Unnamed: 0,date,consumption__wh_,wind,solar,wind_solar
0,2006-01-01,1069.184,,,
1,2006-01-02,1380.521,,,
2,2006-01-03,1442.533,,,
3,2006-01-04,1457.217,,,
4,2006-01-05,1477.131,,,


In [30]:
# Let's see data types of the columns
df.dtypes

Unnamed: 0,0
date,object
consumption__wh_,float64
wind,float64
solar,float64
wind_solar,float64


In [31]:

# We want to change the date to DateTime object and make it an index
df.date = pd.to_datetime(df.date, format="mixed", dayfirst=True)
df.set_index("date", inplace=True)
df.head()

Unnamed: 0_level_0,consumption__wh_,wind,solar,wind_solar
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-01-01,1069.184,,,
2006-01-02,1380.521,,,
2006-01-03,1442.533,,,
2006-01-04,1457.217,,,
2006-01-05,1477.131,,,


In [32]:
print(df.index.freq)
print("Shape before setting frequency: ", df.shape)
df_days = df.asfreq(params['original_frequency']).copy()
print("Shape after setting frequency: ", df_days.shape)
print(df.index.freq)

None
Shape before setting frequency:  (4385, 4)
Shape after setting frequency:  (4383, 4)
None


In [33]:
# save result to file
with open("proj5_ex01.pkl", "wb") as file:
  pickle.dump(df_days, file)

### Frequency adjustment

In [34]:
df_weeks = df_days.asfreq(params['target_frequency']).copy()
print("Shape after setting frequency: ", df_weeks.shape)
with open("proj5_ex02.pkl", "wb") as file:
  pickle.dump(df_weeks, file)

Shape after setting frequency:  (627, 4)


### Downsampling

In [35]:
interval = str(params['downsample_periods']) + params['downsample_units']
df3d = df_days.resample(interval).sum(min_count=params['downsample_periods'])

with open("proj5_ex03.pkl", "wb") as file:
  pickle.dump(df3d, file)

df3d.head()

Unnamed: 0_level_0,consumption__wh_,wind,solar,wind_solar
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-01-01,3892.238,,,
2006-01-04,4337.775,,,
2006-01-07,4037.595,,,
2006-01-10,4707.367,,,
2006-01-13,4170.422,,,


### Upsampling

In [36]:
interval = str(params['upsample_periods']) + params['upsample_units']
df2h = df_days.resample(interval).interpolate(params['interpolation'], order=params['interpolation_order'])
df2h = df2h / (int(pd.Timedelta('1d') / pd.Timedelta(interval))) # scale values by ratio between original frequency and upsampled one

with open("proj5_ex04.pkl", "wb") as file:
  pickle.dump(df2h, file)

df2h.tail()

Unnamed: 0_level_0,consumption__wh_,wind,solar,wind_solar
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-30 16:00:00,95.117424,63.173803,0.423908,63.597711
2017-12-30 18:00:00,94.371167,62.76917,0.626352,63.395522
2017-12-30 20:00:00,93.643499,62.132939,0.897118,63.030057
2017-12-30 22:00:00,92.938331,61.248189,1.241551,62.489741
2017-12-31 00:00:00,92.259573,60.098,1.665,61.763


### Reshaping & alignment

In [37]:
df_sensors = pd.read_pickle("proj5_sensors.pkl")
df_sensors

Unnamed: 0_level_0,device_id,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-11-25 00:00:25,25,6.693750
2022-11-25 00:01:25,25,6.725000
2022-11-25 00:02:25,25,6.756250
2022-11-25 00:03:25,25,6.787500
2022-11-25 00:04:25,25,6.818750
...,...,...
2022-11-27 23:55:29,47,6.022222
2022-11-27 23:56:29,47,6.016667
2022-11-27 23:57:29,47,6.011111
2022-11-27 23:58:29,47,6.005556


In [38]:
# getting the interval
interval = str(params['sensors_periods']) + params['sensors_units']

# pivot the table so each device has its own column
dfp = df_sensors.pivot(columns='device_id', values='value')
dfp

device_id,25,26,27,28,29,30,31,32,33,34,...,38,39,40,41,42,43,44,45,46,47
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-11-25 00:00:03,,,,,,,,,,,...,,6.5,,,,,,,,
2022-11-25 00:00:04,,,,5.6,,,,,,,...,,,,,,,,,,
2022-11-25 00:00:08,,,,,,,,,,,...,,,,,,,,,,
2022-11-25 00:00:09,,,,,,,,,,,...,,,,,,,,,,
2022-11-25 00:00:12,,,,,,,,,,,...,,,,,7.7,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-11-27 23:59:47,,,,,,,,,,,...,,,,5.133333,,,,,,
2022-11-27 23:59:50,,,,,,,,,,,...,,,,,,5.584211,,,,
2022-11-27 23:59:51,,,,,,6.094737,,,,,...,,,,,,,,,,
2022-11-27 23:59:55,,,5.3,,,,,,,,...,,,,,,,,,,


In [39]:
# creating union of old freq and new freq
new_index = pd.date_range(dfp.index.round(interval).min(), dfp.index.round(interval).max(), freq=interval)
dfp2 = dfp.reindex(new_index.union(dfp.index)).interpolate(method='linear')
dfp2.head()

device_id,25,26,27,28,29,30,31,32,33,34,...,38,39,40,41,42,43,44,45,46,47
2022-11-25 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2022-11-25 00:00:03,,,,,,,,,,,...,,6.5,,,,,,,,
2022-11-25 00:00:04,,,,5.6,,,,,,,...,,6.5,,,,,,,,
2022-11-25 00:00:08,,,,5.599533,,,,,,,...,,6.5,,,,,,,,
2022-11-25 00:00:09,,,,5.599066,,,,,,,...,,6.5,,,,,,,,


In [40]:
dfp3 = dfp2.reindex(new_index)
print(dfp3.shape)
dfp3.head()

(25921, 23)


device_id,25,26,27,28,29,30,31,32,33,34,...,38,39,40,41,42,43,44,45,46,47
2022-11-25 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2022-11-25 00:00:10,,,,5.598599,,,,,,,...,,6.5,,,,,,,,
2022-11-25 00:00:20,,,,5.596731,6.737542,,,,,,...,,6.5,3.2375,,7.702614,,,,6.798051,
2022-11-25 00:00:30,6.69838,,,5.593462,6.745791,,5.43803,,,5.367901,...,7.07037,6.5,3.240741,,7.708715,,,7.101559,6.791228,7.727778
2022-11-25 00:00:40,6.701852,5.085926,,5.592061,6.749327,,5.437879,7.545267,,5.358642,...,7.072222,6.5,3.24213,,7.711329,,,7.103899,6.788304,7.729293


In [41]:
# droping columns with NaN values
df_result = dfp3.dropna()
print(df_result.shape)
df_result.head()

(25915, 23)


device_id,25,26,27,28,29,30,31,32,33,34,...,38,39,40,41,42,43,44,45,46,47
2022-11-25 00:01:00,6.711111,5.10963,5.19213,5.588325,6.758754,5.562434,5.437475,7.551852,6.749673,5.333951,...,7.07716,6.5,3.245833,7.106173,7.718301,5.215595,7.022896,7.110136,6.780507,7.733333
2022-11-25 00:01:10,6.716898,5.124444,5.203704,5.58599,6.764646,5.571252,5.437222,7.555967,6.751852,5.318519,...,7.080247,6.5,3.248148,7.112346,7.722658,5.235088,7.026263,7.114035,6.775634,7.735859
2022-11-25 00:01:20,6.721528,5.136296,5.212963,5.584122,6.76936,5.578307,5.43702,7.559259,6.753595,5.306173,...,7.082716,6.5,3.25,7.117284,7.726144,5.250682,7.028956,7.117154,6.771735,7.737879
2022-11-25 00:01:30,6.72963,5.157037,5.229167,5.580853,6.777609,5.590653,5.436667,7.565021,6.756645,5.290741,...,7.087037,6.5,3.253241,7.125926,7.732244,5.277973,7.03367,7.122612,6.764912,7.741414
2022-11-25 00:01:40,6.733102,5.165926,5.236111,5.579452,6.781145,5.595944,5.436515,7.56749,6.757952,5.285185,...,7.088889,6.5,3.25463,7.12963,7.734858,5.289669,7.03569,7.124951,6.761988,7.742929


In [42]:
with open('proj5_ex05.pkl', 'wb') as file:
  pickle.dump(df_result, file)