In [202]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
steel_industry_energy_consumption = fetch_ucirepo(id=851) 
  
# data (as pandas dataframes) 
X = steel_industry_energy_consumption.data.features 
y = steel_industry_energy_consumption.data.targets 
  
# metadata 
print(steel_industry_energy_consumption.metadata) 
  
# variable information 
print(steel_industry_energy_consumption.variables) 


{'uci_id': 851, 'name': 'Steel Industry Energy Consumption', 'repository_url': 'https://archive.ics.uci.edu/dataset/851/steel+industry+energy+consumption', 'data_url': 'https://archive.ics.uci.edu/static/public/851/data.csv', 'abstract': 'The data is collected from a smart small-scale steel industry in South Korea.', 'area': 'Physics and Chemistry', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 35040, 'num_features': 9, 'feature_types': ['Real', 'Categorical'], 'demographics': [], 'target_col': ['Load_Type'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2021, 'last_updated': 'Mon Aug 14 2023', 'dataset_doi': '10.24432/C52G8C', 'creators': ['Sathishkumar V E', 'Changsun Shin', 'Yongyun Cho'], 'intro_paper': {'title': 'Efficient energy consumption prediction model for a data analytic-enabled industry building in a smart city', 'authors': 'Sathishkumar V E, Changsun Shin, Yongyun Cho', 'published

In [203]:
import numpy as np
categorical_columns = X.select_dtypes(include='object').columns
numerical_columns = X.select_dtypes(include=np.number).columns

In [204]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from category_encoders import TargetEncoder

y = LabelEncoder().fit_transform(np.ravel(y))

columnTransformer = ColumnTransformer(
    transformers=[
        ('categorical', TargetEncoder(), categorical_columns),
        ('num', StandardScaler(), numerical_columns)]
)
X = columnTransformer.fit_transform(X, y)

In [205]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, shuffle=False)

In [1]:
import torch
from torch import nn
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader

In [354]:
class MyDataset(Dataset):

    def __init__(self, X, y, q = 1):
        self.X = torch.tensor(X, dtype = torch.float)
        self.y = torch.tensor(y).long()
        self.q = q
    def __len__(self):
        return self.X.shape[0] - (self.q-1)
    def __getitem__(self, index):
        return (self.X[index:index+self.q], self.y[index+self.q-1])

In [355]:
train_loader = DataLoader(MyDataset(X_train, y_train, 5), batch_size = 64, shuffle = False)
test_loader = DataLoader(MyDataset(X_test, y_test, 5), batch_size = 64, shuffle = False)

In [356]:
class RNN_Classifier(nn.Module):
    def __init__(self, in_features, num_classes, hidden_size):
        super().__init__()
        self.encoder = nn.RNN(input_size = in_features, hidden_size = hidden_size, batch_first = True)
        self.head = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        _, out = self.encoder(x)
        return self.head(out[-1])

In [357]:
NUM_CLASSES = 3
HIDDEN_SIZE = 32
model = RNN_Classifier(X.shape[1], NUM_CLASSES, HIDDEN_SIZE)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
criterion = nn.CrossEntropyLoss()

In [358]:
from tqdm.notebook import tqdm
num_epochs = 100
pbar = tqdm(range(1, num_epochs + 1))

for epoch in pbar:
    train_loss, train_accuracy = 0.0, 0.0
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.shape[0]
        train_accuracy += np.sum(predictions.detach().numpy().argmax(axis=1) == y_batch.detach().numpy())
    test_loss, test_accuracy = 0.0, 0.0
    model.eval()
    for X_batch, y_batch in test_loader:
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        test_loss += loss.item() * X_batch.shape[0]
        test_accuracy += np.sum(predictions.detach().numpy().argmax(axis=1) == y_batch.detach().numpy())
    train_loss /= len(train_loader.dataset)
    test_loss /= len(test_loader.dataset)
    train_accuracy /= len(train_loader.dataset)
    test_accuracy /= len(test_loader.dataset)
    pbar.set_postfix({'train loss': train_loss, 'test loss': test_loss, 'train accuracy' : train_accuracy, 'test accuracy' : test_accuracy})


  0%|          | 0/100 [00:00<?, ?it/s]

### Моя реализация RNN

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tests

In [3]:
tests.test_RNNcell()

test_RNNCell ... OK


In [18]:
tests.test_RNN()

test_RNN ... OK


### Моя реализация LSTM

In [16]:
tests.test_LSTMcell()

test_LSTMCell ... OK


In [37]:
tests.test_LSTM()

test_LSTM ... OK


### Моя реализация GRU

In [82]:
tests.test_GRUCell()

test_GRUCell ... OK
