# Task 2. RNN

Develop RNN, GRU and LSTM to predict Usage_kWh. Dataset - http://archive.ics.uci.edu/dataset/851/steel+industry+energy+consumption.

Hyperparameters are at your discretion

Compare the quality of the MSE, RMSE and R^2 models

In [1]:
! pip install ucimlrepo


Looking in indexes: https://nid-artifactory.ad.speechpro.com/artifactory/api/pypi/pypi/simple
Collecting ucimlrepo
  Downloading https://nid-artifactory.ad.speechpro.com/artifactory/api/pypi/pypi/packages/packages/3b/07/1252560194df2b4fad1cb3c46081b948331c63eb1bb0b97620d508d12a53/ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [1]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
steel_industry_energy_consumption = fetch_ucirepo(id=851) 
  
# data (as pandas dataframes) 
X = steel_industry_energy_consumption.data.features 
y = steel_industry_energy_consumption.data.targets 
  
# metadata 
print(steel_industry_energy_consumption.metadata) 
  
# variable information 
print(steel_industry_energy_consumption.variables) 

{'uci_id': 851, 'name': 'Steel Industry Energy Consumption', 'repository_url': 'https://archive.ics.uci.edu/dataset/851/steel+industry+energy+consumption', 'data_url': 'https://archive.ics.uci.edu/static/public/851/data.csv', 'abstract': 'The data is collected from a smart small-scale steel industry in South Korea.', 'area': 'Physics and Chemistry', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 35040, 'num_features': 9, 'feature_types': ['Real', 'Categorical'], 'demographics': [], 'target_col': ['Load_Type'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2021, 'last_updated': 'Mon Aug 14 2023', 'dataset_doi': '10.24432/C52G8C', 'creators': ['Sathishkumar V E', 'Changsun Shin', 'Yongyun Cho'], 'intro_paper': {'title': 'Efficient energy consumption prediction model for a data analytic-enabled industry building in a smart city', 'authors': 'Sathishkumar V E, Changsun Shin, Yongyun Cho', 'published

In [2]:
X.head()

Unnamed: 0,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week
0,3.17,2.95,0.0,0.0,73.21,100.0,900,Weekday,Monday
1,4.0,4.46,0.0,0.0,66.77,100.0,1800,Weekday,Monday
2,3.24,3.28,0.0,0.0,70.28,100.0,2700,Weekday,Monday
3,3.31,3.56,0.0,0.0,68.09,100.0,3600,Weekday,Monday
4,3.82,4.5,0.0,0.0,64.72,100.0,4500,Weekday,Monday


In [3]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score 
from math import sqrt
from sklearn.model_selection import train_test_split

In [4]:
X.dtypes

Usage_kWh                               float64
Lagging_Current_Reactive.Power_kVarh    float64
Leading_Current_Reactive_Power_kVarh    float64
CO2(tCO2)                               float64
Lagging_Current_Power_Factor            float64
Leading_Current_Power_Factor            float64
NSM                                       int64
WeekStatus                               object
Day_of_week                              object
dtype: object

In [5]:
days = {l:i for i, l in enumerate(X.Day_of_week.unique())}
week = {l:i for i, l in enumerate(X.WeekStatus.unique())}
X.loc[:, "Day_of_week_int"] = X.Day_of_week.apply(lambda x: days[x])
X.loc[:, "WeekStatus_int"] = X.WeekStatus.apply(lambda x: week[x])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.loc[:, "Day_of_week_int"] = X.Day_of_week.apply(lambda x: days[x])


In [6]:
X.head()

Unnamed: 0,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Day_of_week_int,WeekStatus_int
0,3.17,2.95,0.0,0.0,73.21,100.0,900,Weekday,Monday,0,0
1,4.0,4.46,0.0,0.0,66.77,100.0,1800,Weekday,Monday,0,0
2,3.24,3.28,0.0,0.0,70.28,100.0,2700,Weekday,Monday,0,0
3,3.31,3.56,0.0,0.0,68.09,100.0,3600,Weekday,Monday,0,0
4,3.82,4.5,0.0,0.0,64.72,100.0,4500,Weekday,Monday,0,0


In [7]:
y = X.Usage_kWh.values
feats = X.drop(["Usage_kWh", "WeekStatus", "Day_of_week"], axis=1).values


In [8]:
feats

array([[2.95e+00, 0.00e+00, 0.00e+00, ..., 9.00e+02, 0.00e+00, 0.00e+00],
       [4.46e+00, 0.00e+00, 0.00e+00, ..., 1.80e+03, 0.00e+00, 0.00e+00],
       [3.28e+00, 0.00e+00, 0.00e+00, ..., 2.70e+03, 0.00e+00, 0.00e+00],
       ...,
       [3.17e+00, 7.00e-02, 0.00e+00, ..., 8.46e+04, 0.00e+00, 0.00e+00],
       [3.06e+00, 1.10e-01, 0.00e+00, ..., 8.55e+04, 0.00e+00, 0.00e+00],
       [3.02e+00, 7.00e-02, 0.00e+00, ..., 0.00e+00, 0.00e+00, 0.00e+00]])

In [9]:
X_train, X_val, y_train, y_val = train_test_split(feats, y, test_size=0.33, shuffle=False)


In [17]:
# Normalize the data
scaler = MinMaxScaler()
X_train_norm = scaler.fit_transform(X_train)
X_val_norm = scaler.transform(X_val)


In [18]:
# for autoregression
X_train_norm = X_train_norm[:-1]
X_val_norm = X_val_norm[:-1]

In [12]:
# Define the RNN model
class RNN(nn.Module):
    def __init__(self, inputsize, hiddensize, numlayers, outputsize):
        super(RNN, self).__init__()
        self.hiddensize = hiddensize
        self.numlayers = numlayers
        self.rnn = nn.RNN(inputsize, hiddensize, numlayers, batch_first=True)
        self.fc = nn.Linear(hiddensize, outputsize)

    def forward(self, x, h0=None):
        if h0 is None:
            h0 = torch.zeros(self.numlayers, x.size(0), self.hiddensize, device=x.device)
        out, h1 = self.rnn(x, h0)
        out = self.fc(out)
        return out, h1

# Define the GRU model
class GRU(nn.Module):
    def __init__(self, inputsize, hiddensize, numlayers, outputsize):
        super(GRU, self).__init__()
        self.hiddensize = hiddensize
        self.numlayers = numlayers
        self.gru = nn.GRU(inputsize, hiddensize, numlayers, batch_first=True)
        self.fc = nn.Linear(hiddensize, outputsize)

    def forward(self, x, h0=None):
        if h0 is None:
            h0 = torch.zeros(self.numlayers, x.size(0), self.hiddensize, device=x.device)
        out, h1 = self.gru(x, h0)
        out = self.fc(out)
        return out, h1

# Define the LSTM model
class LSTM(nn.Module):
    def __init__(self, inputsize, hiddensize, numlayers, outputsize):
        super(LSTM, self).__init__()
        self.hiddensize = hiddensize
        self.numlayers = numlayers
        self.rnn = nn.LSTM(inputsize, hiddensize, numlayers, batch_first=True)
        self.fc = nn.Linear(hiddensize, outputsize)

    def forward(self, x, h0=None):
        if h0 is None:
            h0 = (torch.zeros(self.numlayers, x.size(0), self.hiddensize, device=x.device), 
                  torch.zeros(self.numlayers, x.size(0), self.hiddensize, device=x.device))
        out, h1 = self.rnn(x, h0)
        out = self.fc(out)
        return out, h1
        


In [13]:
np.arange(100).reshape(2, -1)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
        48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
        66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
        98, 99]])

In [14]:
from tqdm.auto import tqdm

In [22]:
torch.from_numpy(X_train_norm).to('cuda')

tensor([[0.0304, 0.0000, 0.0000,  ..., 0.0105, 0.0000, 0.0000],
        [0.0460, 0.0000, 0.0000,  ..., 0.0211, 0.0000, 0.0000],
        [0.0338, 0.0000, 0.0000,  ..., 0.0316, 0.0000, 0.0000],
        ...,
        [0.0000, 0.4294, 0.0000,  ..., 0.5158, 1.0000, 1.0000],
        [0.0000, 0.4305, 0.0000,  ..., 0.5263, 1.0000, 1.0000],
        [0.0000, 0.4254, 0.0000,  ..., 0.5368, 1.0000, 1.0000]],
       device='cuda:0', dtype=torch.float64)

In [21]:
y_train.shape

(23476,)

In [23]:
# Train the models
inputsize = 9
outputsize = 1
numepochs = 1000
learningrate = 0.01
chunk_len=256
batch_size=64

device='cpu'

X_train_norm = torch.from_numpy(X_train_norm).to(device, dtype=torch.float)
y_train = torch.from_numpy(y_train).to(device, dtype=torch.float)
def train_model(model):
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learningrate)
    
    # Training RNN model
    chunk = (chunk_len*batch_size)
    pbar = tqdm(range(numepochs))
    for epoch in pbar:
        for i in range(0, X_train_norm.shape[0]//chunk):
            start_i = min(i*chunk, X_train_norm.shape[0] - chunk)
            batch_X = X_train_norm[start_i:(i+1)*chunk]
            batch_y = y_train[start_i: (i+1)*chunk + 1]
            batch = torch.cat((batch_X, batch_y[:-1].unsqueeze(dim=1)), dim=1).view(batch_size, chunk_len, -1)
            
            #.reshape(batch_size, chunk_len)
            # batch = torch.from_numpy(batch).to(device, dtype=torch.float)
            optimizer.zero_grad()
            outputs, _ = model(batch)
            loss = criterion(outputs.view(-1), batch_y[1:])
            loss.backward()
            optimizer.step()
        if epoch % 10 == 0:
            print(f'Epoch: {epoch}, Loss: {loss.item()}')
        pbar.set_description_str(f'Epoch: {epoch}, Loss: {loss.item()}')
    return model




In [24]:
rnn = train_model(RNN(inputsize, 64, 2, outputsize))


  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch: 0, Loss: 2228.37451171875
Epoch: 10, Loss: 1780.520751953125
Epoch: 20, Loss: 1543.5274658203125
Epoch: 30, Loss: 1395.326416015625
Epoch: 40, Loss: 1321.4200439453125
Epoch: 50, Loss: 1293.878662109375
Epoch: 60, Loss: 1287.389404296875
Epoch: 70, Loss: 1286.4403076171875
Epoch: 80, Loss: 1142.7340087890625
Epoch: 90, Loss: 833.04150390625
Epoch: 100, Loss: 683.7636108398438
Epoch: 110, Loss: 558.510986328125
Epoch: 120, Loss: 461.48663330078125
Epoch: 130, Loss: 391.5372009277344
Epoch: 140, Loss: 341.51788330078125
Epoch: 150, Loss: 306.42974853515625
Epoch: 160, Loss: 280.3232421875
Epoch: 170, Loss: 259.5093078613281
Epoch: 180, Loss: 242.9205780029297
Epoch: 190, Loss: 231.028076171875
Epoch: 200, Loss: 218.9305419921875
Epoch: 210, Loss: 209.88900756835938
Epoch: 220, Loss: 201.35494995117188
Epoch: 230, Loss: 196.87649536132812
Epoch: 240, Loss: 190.66928100585938
Epoch: 250, Loss: 187.91702270507812
Epoch: 260, Loss: 182.65782165527344
Epoch: 270, Loss: 179.611831665039

In [52]:
gru = train_model(GRU(inputsize, 64, 2, outputsize))


  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch: 0, Loss: 2218.261474609375
Epoch: 10, Loss: 1755.3887939453125
Epoch: 20, Loss: 1522.96435546875
Epoch: 30, Loss: 1381.787353515625
Epoch: 40, Loss: 1314.3994140625
Epoch: 50, Loss: 1291.0755615234375
Epoch: 60, Loss: 1276.792724609375
Epoch: 70, Loss: 903.2313232421875
Epoch: 80, Loss: 761.0872802734375
Epoch: 90, Loss: 626.4653930664062
Epoch: 100, Loss: 514.963623046875
Epoch: 110, Loss: 428.2420349121094
Epoch: 120, Loss: 366.5002136230469
Epoch: 130, Loss: 322.8092041015625
Epoch: 140, Loss: 294.16876220703125
Epoch: 150, Loss: 268.517822265625
Epoch: 160, Loss: 248.83485412597656
Epoch: 170, Loss: 233.9628143310547
Epoch: 180, Loss: 221.92825317382812
Epoch: 190, Loss: 211.91346740722656
Epoch: 200, Loss: 202.45068359375
Epoch: 210, Loss: 199.9518585205078
Epoch: 220, Loss: 190.53350830078125
Epoch: 230, Loss: 184.29721069335938
Epoch: 240, Loss: 179.14845275878906
Epoch: 250, Loss: 175.0459747314453
Epoch: 260, Loss: 175.33489990234375
Epoch: 270, Loss: 169.34002685546875

In [53]:
lstm = train_model(LSTM(inputsize, 64, 2, outputsize))


  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch: 0, Loss: 2218.82470703125
Epoch: 10, Loss: 1812.7386474609375
Epoch: 20, Loss: 1569.880859375
Epoch: 30, Loss: 1410.1893310546875
Epoch: 40, Loss: 1327.9241943359375
Epoch: 50, Loss: 1296.0875244140625
Epoch: 60, Loss: 1287.8179931640625
Epoch: 70, Loss: 1216.3033447265625
Epoch: 80, Loss: 955.6558227539062
Epoch: 90, Loss: 761.4691162109375
Epoch: 100, Loss: 605.4696655273438
Epoch: 110, Loss: 487.2771911621094
Epoch: 120, Loss: 403.5963439941406
Epoch: 130, Loss: 344.14349365234375
Epoch: 140, Loss: 302.4382629394531
Epoch: 150, Loss: 272.538330078125
Epoch: 160, Loss: 250.92327880859375
Epoch: 170, Loss: 233.43772888183594
Epoch: 180, Loss: 221.0426788330078
Epoch: 190, Loss: 209.94107055664062
Epoch: 200, Loss: 201.4827880859375
Epoch: 210, Loss: 194.54417419433594
Epoch: 220, Loss: 188.5466766357422
Epoch: 230, Loss: 182.41143798828125
Epoch: 240, Loss: 179.7904510498047
Epoch: 250, Loss: 174.9194793701172
Epoch: 260, Loss: 172.50704956054688
Epoch: 270, Loss: 168.710845947

In [54]:
# valid the models
device='cpu'

X_val_norm = torch.from_numpy(X_val_norm).to(device, dtype=torch.float)
y_val = torch.from_numpy(y_val).to(device, dtype=torch.float)


In [55]:
@torch.no_grad()
def predict(model):
    model = model.to(device)
    model.eval()
    criterion = nn.MSELoss(reduction='sum')
   
    # Training RNN model
    batch_size=1
    chunk = (chunk_len*batch_size)
    total_loss = 0

    h0=None
    predicts = []
    for i in tqdm(range(0, X_val_norm.shape[0]//chunk+1)):
        #start_i = min(i*chunk, X_val_norm.shape[0] - chunk)
        start_i = i*chunk
        batch_X = X_val_norm[start_i:(i+1)*chunk]
        batch_y = y_val[start_i: (i+1)*chunk + 1]
        batch = torch.cat((batch_X, batch_y[:-1].unsqueeze(dim=1)), dim=1).view(batch_size, -1, batch_X.shape[1]+1)

        outputs, h0 = model(batch, h0)
        total_loss += criterion(outputs.view(-1), batch_y[1:]).item()
        predicts.extend(outputs.view(-1).cpu().tolist())
    print(f'Loss: {total_loss / X_val_norm.shape[0]}')
    return predicts

In [58]:
# Generate predictions

for name, model in zip(['rnn', 'gru', 'lstm'], [rnn, gru, lstm]):
    predictions = predict(model)
    mse = mean_squared_error(y_val[1:], predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_val[1:], predictions)
    print(f"{name=}, {mse=}, {rmse=}, {r2=}")
    




  0%|          | 0/46 [00:00<?, ?it/s]

Loss: 120.69131114863714
name='rnn', mse=120.69131135447738, rmse=10.985959737523043, r2=0.8768350631292092


  0%|          | 0/46 [00:00<?, ?it/s]

Loss: 136.449274525143
name='gru', mse=136.44927360168705, rmse=11.681150354382357, r2=0.8607541339918203


  0%|          | 0/46 [00:00<?, ?it/s]

Loss: 104.39774079641586
name='lstm', mse=104.3977416585601, rmse=10.217521306978522, r2=0.893462577243323


In [None]:
# Вывод