In [1]:
import pandas as pd
import pickle
import torch

from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
def load_dataset():
    df = pd.read_csv('c:/workspace/dillon.csv')

    # Remove data with NaN values
    df = df.dropna()
    
    # Remove irrelevant data
    X = df.drop(['date', 'hour'], axis=1)
    
    # Scale the X dataset
    ss = MinMaxScaler() 
    X = ss.fit_transform(X)
    with open('c:/workspace/ae_1d_min_max_scaler.pkl', 'wb') as f:
        pickle.dump(ss, f)
    
    return train_test_split(X, X, test_size=0.2)

x_train, x_test, _, _ = load_dataset()
x_train = torch.from_numpy(x_train).float().cuda()
x_test = torch.from_numpy(x_test).float().cuda()
y_test = x_test.cpu().numpy()

batch_size = 32

In [3]:
class AutoEncoder(torch.nn.Module):
    def __init__(self, num_features: int, l1_neurons: int):
        super().__init__()
        self.num_features = num_features
        self.scenario_1d(l1_neurons)

    def scenario_1d(self, l1: int):
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(self.num_features, l1)
        ).cuda()

        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(l1, self.num_features),
            torch.nn.Sigmoid()
        ).cuda()
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)

        return decoded
    
l1_neurons = 25
model = AutoEncoder(x_train.size()[1], l1_neurons)
optimizer = torch.optim.Adam(model.parameters(), 0.01)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
loss_function = torch.nn.MSELoss()
epochs = 64

In [4]:
def train(model, loss, optimizer, inputs, labels):
    optimizer.zero_grad()

    logits = model.forward(inputs)
    output = loss.forward(logits, labels)
    output.backward()
    optimizer.step()

    return output.item()

def predict(model, inputs):
    logits = model.forward(inputs)
    return logits.data

def train_1d():
    for epoch in range(epochs):
        cost = 0
        loader = torch.utils.data.DataLoader(dataset = x_train,
                                             batch_size = batch_size,
                                             shuffle = True)

        for batch in loader:
            cost += train(model, loss_function, optimizer, batch, batch)

        y_pred = predict(model, x_test)
        acc = r2_score(y_pred.cpu().numpy(), y_test)
        scheduler.step()

        print(f"Epoch: {epoch+1}, cost: {cost / len(loader):.4f}, acc: {acc:.3f}, lr: {scheduler.get_last_lr()[0]:.2e}")

train_1d()

Epoch: 1, cost: 0.0057, acc: 0.875, lr: 9.00e-03
Epoch: 2, cost: 0.0021, acc: 0.896, lr: 8.10e-03
Epoch: 3, cost: 0.0018, acc: 0.894, lr: 7.29e-03
Epoch: 4, cost: 0.0016, acc: 0.916, lr: 6.56e-03
Epoch: 5, cost: 0.0016, acc: 0.914, lr: 5.90e-03
Epoch: 6, cost: 0.0015, acc: 0.916, lr: 5.31e-03
Epoch: 7, cost: 0.0015, acc: 0.921, lr: 4.78e-03
Epoch: 8, cost: 0.0015, acc: 0.924, lr: 4.30e-03
Epoch: 9, cost: 0.0015, acc: 0.923, lr: 3.87e-03
Epoch: 10, cost: 0.0015, acc: 0.925, lr: 3.49e-03
Epoch: 11, cost: 0.0015, acc: 0.926, lr: 3.14e-03
Epoch: 12, cost: 0.0014, acc: 0.927, lr: 2.82e-03
Epoch: 13, cost: 0.0014, acc: 0.926, lr: 2.54e-03
Epoch: 14, cost: 0.0014, acc: 0.930, lr: 2.29e-03
Epoch: 15, cost: 0.0014, acc: 0.929, lr: 2.06e-03
Epoch: 16, cost: 0.0014, acc: 0.930, lr: 1.85e-03
Epoch: 17, cost: 0.0014, acc: 0.931, lr: 1.67e-03
Epoch: 18, cost: 0.0014, acc: 0.926, lr: 1.50e-03
Epoch: 19, cost: 0.0014, acc: 0.928, lr: 1.35e-03
Epoch: 20, cost: 0.0014, acc: 0.931, lr: 1.22e-03
Epoch: 21

In [5]:
torch.save(model.state_dict(), 'c:/workspace/ae_1d_fnn.pt')