## Imports

In [126]:
import pandas as pd
import numpy as np
import json

## Get data

In [127]:
params = {}
with open('proj5_params.json') as f:
    params = json.load(f)
print(params)

df = pd.read_csv("proj5_timeseries.csv")
# cleaning of the data
df.columns = df.columns.str.lower().str.replace(r"[^a-z]", "_", regex=True)


{'original_frequency': 'd', 'target_frequency': 'w', 'downsample_periods': 3, 'downsample_units': 'd', 'upsample_periods': 2, 'upsample_units': 'h', 'interpolation': 'polynomial', 'interpolation_order': 3, 'sensors_periods': 10, 'sensors_units': 's'}


## Preparing time series datasets

In [128]:
# convert 'date' to datetime
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

In [129]:
# set the frequency of the time series
df = df.asfreq(params['original_frequency'])

# save the data
df.to_pickle("proj5_ex01.pkl")
df_copy = df.copy()
df

Unnamed: 0_level_0,consumption,wind,solar,wind_solar
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-01-01,1069.18400,,,
2006-01-02,1380.52100,,,
2006-01-03,1442.53300,,,
2006-01-04,1457.21700,,,
2006-01-05,1477.13100,,,
...,...,...,...,...
2017-12-27,1263.94091,394.507,16.530,411.037
2017-12-28,1299.86398,506.424,14.162,520.586
2017-12-29,1295.08753,584.277,29.854,614.131
2017-12-30,1215.44897,721.247,7.467,728.714


## 2.2 Frequency adjustment

In [130]:
df = df_copy.copy()
df = df.asfreq(params['target_frequency'])

# save the data
df.to_pickle("proj5_ex02.pkl")

## 2.3 Downsampling

In [131]:
df = df_copy.copy()
df = df.resample(str(params['downsample_periods']) + params['downsample_units']).sum(min_count=params['downsample_periods'])


# save the data
df.to_pickle("proj5_ex03.pkl")

## 2.4 Upsampling

In [132]:
df = df_copy.copy()

df = df.resample(str(params['upsample_periods']) + params['upsample_units']).interpolate(params['interpolation'], order=params['interpolation_order'])

#### Scaling the values according to the ratio between the original frequency and the upsampled one

In [133]:
ratio = pd.Timedelta(params['upsample_periods'], params['upsample_units']) / pd.Timedelta(1, params['original_frequency'])
df *= ratio

# save the data
df.to_pickle("proj5_ex04.pkl")

## 2.5 Reshaping & alignment

In [134]:
df = pd.read_pickle("proj5_sensors.pkl")
df


Unnamed: 0_level_0,device_id,value
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-11-25 00:00:25,25,6.693750
2022-11-25 00:01:25,25,6.725000
2022-11-25 00:02:25,25,6.756250
2022-11-25 00:03:25,25,6.787500
2022-11-25 00:04:25,25,6.818750
...,...,...
2022-11-27 23:55:29,47,6.022222
2022-11-27 23:56:29,47,6.016667
2022-11-27 23:57:29,47,6.011111
2022-11-27 23:58:29,47,6.005556


In [135]:
df = df.pivot(columns='device_id', values='value')


# reindex
new_index = pd.date_range(start=df.index.round("1min").min(), end=df.index.round("1min").max(), freq=str(params['sensors_periods']) + str(params['sensors_units']))
df = df.reindex(new_index.union(df.index)).interpolate()
df = df.reindex(new_index)

# remove NaN
df = df.dropna()

# save the data
df.to_pickle("proj5_ex05.pkl")
df


device_id,25,26,27,28,29,30,31,32,33,34,...,38,39,40,41,42,43,44,45,46,47
2022-11-25 00:01:00,6.711111,5.109630,5.192130,5.588325,6.758754,5.562434,5.437475,7.551852,6.749673,5.333951,...,7.077160,6.500000,3.245833,7.106173,7.718301,5.215595,7.022896,7.110136,6.780507,7.733333
2022-11-25 00:01:10,6.716898,5.124444,5.203704,5.585990,6.764646,5.571252,5.437222,7.555967,6.751852,5.318519,...,7.080247,6.500000,3.248148,7.112346,7.722658,5.235088,7.026263,7.114035,6.775634,7.735859
2022-11-25 00:01:20,6.721528,5.136296,5.212963,5.584122,6.769360,5.578307,5.437020,7.559259,6.753595,5.306173,...,7.082716,6.500000,3.250000,7.117284,7.726144,5.250682,7.028956,7.117154,6.771735,7.737879
2022-11-25 00:01:30,6.729630,5.157037,5.229167,5.580853,6.777609,5.590653,5.436667,7.565021,6.756645,5.290741,...,7.087037,6.500000,3.253241,7.125926,7.732244,5.277973,7.033670,7.122612,6.764912,7.741414
2022-11-25 00:01:40,6.733102,5.165926,5.236111,5.579452,6.781145,5.595944,5.436515,7.567490,6.757952,5.285185,...,7.088889,6.500000,3.254630,7.129630,7.734858,5.289669,7.035690,7.124951,6.761988,7.742929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-11-27 23:59:20,4.304678,4.484096,5.268627,4.721053,6.380952,6.097661,5.159226,5.188889,4.248889,5.887800,...,5.600000,4.515789,2.300000,5.149383,5.300000,5.576023,4.697856,6.203472,4.005882,6.001235
2022-11-27 23:59:30,4.305263,4.482571,5.282353,4.721053,6.380952,6.096296,5.155455,5.190123,4.243704,5.888235,...,5.600000,4.515789,2.300000,5.140741,5.300000,5.580117,4.699220,6.200000,4.005882,6.000000
2022-11-27 23:59:40,4.305263,4.482353,5.288235,4.721053,6.380952,6.095712,5.155455,5.190476,4.241481,5.888235,...,5.600000,4.515789,2.300000,5.137037,5.300000,5.581871,4.699805,6.200000,4.005882,6.000000
2022-11-27 23:59:50,4.305263,4.482353,5.296078,4.721053,6.380952,6.094932,5.155455,5.190476,4.240000,5.888235,...,5.600000,4.515789,2.300000,5.133333,5.300000,5.584211,4.700000,6.200000,4.005882,6.000000
