In [3]:
import sys
sys.path.append('../../')

import os
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
import math

from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler

# Define fallback versions of missing utils
def load_data(filepath):
    return pd.read_csv(filepath)

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Load data
df = load_data('energy.csv')

# Ensure timestamp is datetime and set as index
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Define date ranges
train_start_dt = '2014-11-01 00:00:00'
test_start_dt = '2014-12-30 00:00:00'

# Create train and test sets
train = df.copy()[(df.index >= train_start_dt) & (df.index < test_start_dt)][['load']]
test = df.copy()[df.index >= test_start_dt][['load']]

# Scale data
scaler = MinMaxScaler()
train['load'] = scaler.fit_transform(train)
test['load'] = scaler.transform(test)

# Define timesteps
timesteps = 24  # or any value you'd like (e.g., 24 hours)

# Convert to numpy arrays
train_data = train.values
test_data = test.values

# Create sequences for training
train_data_timesteps = np.array(
    [[j for j in train_data[i:i+timesteps]] for i in range(len(train_data) - timesteps + 1)]
)[:, :, 0]

# Create sequences for testing
test_data_timesteps = np.array(
    [[j for j in test_data[i:i+timesteps]] for i in range(len(test_data) - timesteps + 1)]
)[:, :, 0]

# Split into features and targets
x_train, y_train = train_data_timesteps[:, :timesteps-1], train_data_timesteps[:, [timesteps-1]]
x_test, y_test = test_data_timesteps[:, :timesteps-1], test_data_timesteps[:, [timesteps-1]]

# Show shapes
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(1393, 23) (1393, 1)
(25, 23) (25, 1)
