In [4]:
# part 2 prep data
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler
import os
import pickle 

# Load  Data
DATA_PATH = f'./data/km_scada_sample_2022.csv'
df = pd.read_csv(DATA_PATH, index_col=0, parse_dates=True)

def split(df):
    s = df.index.min()
    t1 = s + pd.DateOffset(months=9)
    t2 = t1 + pd.DateOffset(months=2)
    train = df.loc[s:t1 - pd.Timedelta(minutes=10)]
    valid = df.loc[t1:t2 - pd.Timedelta(minutes=10)]
    test = df.loc[t2:]
    return train, valid, test

def scale(train, valid, test, col):
    sc = MinMaxScaler()
    tr = sc.fit_transform(train[[col]])
    va = sc.transform(valid[[col]])
    te = sc.transform(test[[col]])
    return tr, va, te, sc

def prep(df):
    train, valid, test = split(df)
    tr, va, te, sc = scale(train, valid, test, 'Power (kW)')

    ws = 18
    sa = 1

    # Inline windowing for train
    Xtr, ytr = [], []
    for i in range(len(tr) - ws - sa + 1):
        Xtr.append(tr[i:i+ws])
        ytr.append(tr[i+ws+sa-1])
    Xtr = np.array(Xtr)
    ytr = np.array(ytr)

    # Inline windowing for valid
    Xva, yva = [], []
    for i in range(len(va) - ws - sa + 1):
        Xva.append(va[i:i+ws])
        yva.append(va[i+ws+sa-1])
    Xva = np.array(Xva)
    yva = np.array(yva)

    # Inline windowing for test
    Xte, yte = [], []
    for i in range(len(te) - ws - sa + 1):
        Xte.append(te[i:i+ws])
        yte.append(te[i+ws+sa-1])
    Xte = np.array(Xte)
    yte = np.array(yte)

    # Tensor conversion
    Xtr = torch.tensor(Xtr, dtype=torch.float32)
    ytr = torch.tensor(ytr, dtype=torch.float32)
    Xva = torch.tensor(Xva, dtype=torch.float32)
    yva = torch.tensor(yva, dtype=torch.float32)
    Xte = torch.tensor(Xte, dtype=torch.float32)
    yte = torch.tensor(yte, dtype=torch.float32)

    return {'train': (Xtr, ytr), 'valid': (Xva, yva), 'test': (Xte, yte)},sc

d,sc = prep(df)

# Save dict
clean_prepped_dataset = {'dataset': d,
                            'scaler': sc}
# Save results
data_dir = f'./data'
os.makedirs(data_dir, exist_ok=True)

with open(f'{data_dir}/clean_prepped_dataset.pkl', 'wb') as f:
    pickle.dump(clean_prepped_dataset, f)
