In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
from torch.utils.data import Dataset, DataLoader
import time
import winsound


In [2]:
def pre_process(data, var):
    
    Q = 60.0  + np.cumsum(data.I)/36000
    data["Q"] = Q
    return data[var]

def windows (data, window_size, stride):
    
    x = np.array(data)
    length = data.shape[0]
    windows_list = []
    
    for i in range(0, length-window_size, stride):
        windows_list.append(x[i:i+window_size][np.newaxis,:])
        
    return tuple(windows_list)


class Dataset(object):
    """An abstract class representing a Dataset.
    All other datasets should subclass it. All subclasses should override
    ``__len__``, that provides the size of the dataset, and ``__getitem__``,
    supporting integer indexing in range from 0 to len(self) exclusive.
    """
    
    def __init__(self, data, window_size, stride, scale = 0, max_list=[]):
        
        x_list = []
        y_list = []
        
        for i in range(0, len(data)-window_size, stride):             
            x_list.append(data[i:i+window_size, 1:][np.newaxis,:])
            #y_list.append(data[i:i+window_size, 0][np.newaxis,:])
            y_list.append(data[i+window_size,0])
        
        self.x = np.vstack(x_list)
        self.y = np.vstack(y_list)
        
        if scale:
           
            self.max_list = max_list
            
        else:
            self.max_y = np.max(self.y)
            self.max_list = [np.max(np.max(self.x, axis=1), axis=0), self.max_y]
            
        
        self.x = np.divide(self.x, self.max_list[0])
        self.y = self.y/self.max_list[-1]
        #self.y = (self.y-np.mean(self.y))/np.std(self.y)
        
        print("x shape:", self.x.shape)
        print("y shape:", self.y.shape)
 

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.y.shape[0]

    def __add__(self, other):
        return ConcatDataset([self, other])
   
        
    
def mae(y_pred, y_true):
    loss = (torch.abs(y_pred - y_true)).mean()
    return loss    

In [5]:
class LinearModel(nn.Module):
    
    def __init__(self, input_dim, output_dim, seq_len):
        
        super(LinearModel, self).__init__()
        self.n_features = input_dim*seq_len
        self.linear = nn.Linear(self.n_features, output_dim)
        
    def forward(self, x):
        
        out = self.linear(x.view(-1,self.n_features))
        return out
       
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


var = ["U", "T", "I", "Q"]
path = "C:/Users/Sebastian/Documents/Data Analytics Master/Semester4-Thesis/Datasets/Battery-data/vw/DataLake/DataLake/0/EH_AgingStatus_0Y_Temp_11_rndLC_W.csv"

In [6]:
data = pd.read_csv(path)
data = pre_process(data, var)
data.head()

Unnamed: 0,U,T,I,Q
0,4.250048,11.0,0.0,60.0
1,4.250048,11.0,0.0,60.0
2,4.250048,11.0,0.0,60.0
3,4.250048,11.0,0.0,60.0
4,4.250048,11.0,0.0,60.0


In [8]:
stride = 1
window_size = 20
backcast = window_size
forecast = 1
train_split = int(len(data)*0.6)
val_split = int(len(data)*0.8)
batch_size = 1024
seq_len = backcast
output_dim = forecast
input_dim = 3
n_layers = 1
hidden_dim = 20

data = np.array(data)

params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers': 0}

idx = np.arange(len(data))

#dont apply it for time series
#np.random.shuffle(idx)

train_idx = idx[:train_split]
val_idx = idx[train_split:val_split]
test_idx = idx[val_split:]

train_dataset = Dataset(data[train_idx], window_size, stride)
train_loader = DataLoader(train_dataset, **params)
train_iter = iter(train_loader)

test_dataset = Dataset(data[test_idx], window_size, stride, scale = 1, max_list = train_dataset.max_list)
test_loader = DataLoader(test_dataset, **params)
test_iter = iter(test_loader)

val_dataset = Dataset(data[val_idx], window_size, stride, scale = 1, max_list = train_dataset.max_list)
val_loader = DataLoader(val_dataset, **params)
val_iter = iter(val_loader)


x shape: (58220, 20, 3)
y shape: (58220, 1)
x shape: (19394, 20, 3)
y shape: (19394, 1)
x shape: (19393, 20, 3)
y shape: (19393, 1)


In [13]:
model = LinearModel (input_dim,  output_dim, seq_len)

print("Model size:", count_parameters(model))

optimizer = optim.Adam(model.parameters(), lr=0.0001)
max_epochs = 10
val_loss_list = []
prev_loss = np.inf


start = time.time()


for i in range(max_epochs):
    
    train_iter = iter(train_loader)
    train_loss = 0.0
    for x_train, y_train in train_iter:
    
        model.zero_grad()
        x_train, y_train = torch.tensor(x_train).float(), torch.tensor(y_train).float()

        #model.batch_size = x_train.shape[0]
        #hidden_init = model.init_hidden()
        y_pred = model(x_train)
        loss = mae(y_pred, y_train)
        loss.backward()
        optimizer.step()
        train_loss += loss.data.numpy()*x_train.shape[0]
    print("Train loss:", train_loss/len(train_dataset))
        
        
    val_loss = 0.0
    for x_val, y_val in val_loader:
        x_val, y_val = torch.tensor(x_val).float(), torch.tensor(y_val).float()
        #if(x_val.shape[0]==batch_size):
        model.batch_size = x_val.shape[0]
        y_pred = model(x_val)
        val_loss += mae(y_pred, y_val).data.numpy()*x_val.shape[0]
    val_loss /= (len(val_dataset)*forecast)
    
    if(val_loss>prev_loss):
        break
        
    prev_loss = val_loss
    val_loss_list.append(val_loss)
    print(val_loss)
    
    
end = time.time()
print(end - start)

frequency = 2500  # Set Frequency To 2500 Hertz
duration = 100  # Set Duration To 1000 ms == 1 second
winsound.Beep(frequency, duration)

Model size: 61




Train loss: 1.8051516497434108




1.7396658501248616
Train loss: 1.58595255349191
1.5667131400087437
Train loss: 1.3667540771532862
1.3936949862609154
Train loss: 1.1475388203401085
1.2207415331106461
Train loss: 0.9283289725655176
1.0477391466646673
Train loss: 0.7091422016536276
0.8746693973192601
Train loss: 0.491911578465065
0.7038184334481475
Train loss: 0.3063940550475463
0.5645106151935912
Train loss: 0.2223174901751463
0.48588518941653847
Train loss: 0.19583847105441654
0.4586644530102694
21.280006170272827
