In [2]:
# !git clone https://github.com/riteshsv/timeseries.git

In [4]:
import numpy as np
import os

In [5]:
import pandas as pd

In [6]:
import torch
from torch import nn     
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader

In [7]:
from sklearn.preprocessing import MinMaxScaler

In [8]:
def preparedata_time_window(input_data,window):
    out_data = []
    # rearrange the input in form of output based on 3 observations in past
    for i in range(len(input_data)):
        if (i+window+1) <= len(input_data):
            out_data.append((input_data[i:i+window],input_data[i+window:i+(window+1)]))
            
    return out_data

In [9]:
class StandardizeInput():
  def __init__(self):
    self.scaler = MinMaxScaler(feature_range=(-1,1)) 
  def __call__(self,data):
    return self.scaler.fit_transform(data)

# scaler2 = MinMaxScaler(feature_range=(-1,1))
# scaled_input2 = scaler2.fit_transform(data.reshape(-1,1))


In [10]:
class UnivariateTsDataset(Dataset):
    """Univariate time serices dataset."""

    def __init__(self, csv_file, target:str=None, transform=None, window=1,features:list=None,n_features=1,scaler=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
            window (integer): Number of time steps in the past
            features (list) : list of features to be used, default is all features
            n_features (integer) : number of features to be used. default is 1, min number of features required
        """
        self.transform = transform
        self.target = target
        self.window = window
        self.features = features
        self.scaler=scaler
        if len(self.features) > 0:
          self.n_features = len(self.features)
        else:
          self.n_features = n_features
        self.rawdata = pd.read_csv(csv_file)
        self.data = self.preprocess(self.rawdata)
        
    
    def preprocess(self,input_df):
      if self.features != None:
        input_data = input_df[self.features]
      else:
        input_data = input_df
      # scaling
      if self.scaler != None:
        input_data = self.scaler(input_data)
      # convert to tensors
      input_data_tensor = torch.tensor(input_data,dtype=torch.float32,device=device).view(-1)
      out_data = []
      # rearrange the input in form of output based on 3 observations in past
      for i in range(len(input_data_tensor)):
        if (i+self.window+1) <= len(input_data_tensor):
          out_data.append((input_data_tensor[i:i+self.window],input_data_tensor[i+self.window:i+(self.window+1)]))
            
      return out_data
    
    def train_val_test_split(self,train_size=0.8,test_size=.20,val_size=0.0):
      train_len = int(train_size*len(self.data))
      train_set = torch.utils.data.Subset(self,np.arange(train_len))
      val_len = int(len(self.data)*val_size)
      if val_size > 0:
        val_set = torch.utils.data.Subset(self,np.arange(train_len,(train_len+val_len)))
      test_set = torch.utils.data.Subset(self,np.arange((train_len+val_len),len(self.data)))
      if val_size > 0:
        return train_set,val_set,test_set
      return train_set,test_set

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]

        return sample

In [42]:
class LSTMSimple(nn.Module):
    def __init__(self, input_dim,hidden_dim,out_size,num_layers=1,batch_size=1,device=None):
        super(LSTMSimple,self).__init__()
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_dim, hidden_dim,num_layers=num_layers)
        self.out = nn.Linear(hidden_dim,out_size)
        self.hidden_dim = hidden_dim
        self.n_layers = num_layers
        if device == None:
          if torch.cuda.is_available():
            self.device = torch.device('cuda')
          else:
            self.device = torch.device('cpu')
        else:
          self.device = device
        self.hidden = (torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device),torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device))
    def forward(self,x):
      self.hidden = (torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device),torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device))
      out,self.hidden = self.lstm(x.view(len(x[1]),self.batch_size,-1),self.hidden)
      preds = self.out(out[-1])
      return preds
    def predict(self,n_preds,past_val,batch_size=1):
      out = []
      with torch.no_grad():
        input_ten = torch.tensor(past_val,dtype=torch.float32,device=self.device)
        self.eval()
        for p in range(n_preds):
          input_ten = input_ten[-len(past_val):].view(-1,len(past_val))
          self.hidden = (torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device),
                         torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device))
          preds = model(input_ten)
          input_ten = torch.cat((input_ten.view(-1),preds),0)
          out.append(preds)
      return out
    # def predict(n_preds,input_ten):
    #   out = []
    #   with torch.no_grad():
    #     for p in range(n_preds):
    #       input_ten = input_ten[-6:].view(-1,6)
    #       # model.hidden = (torch.zeros(2,1,64,device=device),torch.zeros(2,1,50,device=device))
    #       preds = model(input_ten)
    #       input_ten = torch.cat((input_ten.view(-1),preds),0)
    #       actual_pred = dataset.scaler.scaler.inverse_transform(np.array(preds.view(-1).cpu()).reshape(-1,1))
    #       out.append(actual_pred)
    #   print(actual_pred)
    #   return out

In [46]:
class Trainer():
  def __init__(self,model,train_data,loss_fn,optimizer,val_data=None,epochs=1):
    self.model = model
    self.epochs = epochs
    self.train_data = train_data
    self.val_data = val_data
    self.loss_fn = loss_fn
    self.optimizer = optimizer
    self.predictions = []
  def train(self):
    for epoch in range(self.epochs):
      self.model.train()
      train_loss = 0.0  
      for x,t in self.train_data:
        self.model.zero_grad()
        preds = model(x)
        self.predictions.append(preds)
        loss = self.loss_fn(preds,t)
        loss.backward()
        self.optimizer.step()
        train_loss += loss.item()
      train_loss = train_loss/len(self.train_data)
      if self.val_data != None:
        val_loss = self.__validation()
      if (epoch+1)%10 == 0:
        print(f"Epoch: {epoch+1} loss: {train_loss} validation loss: {val_loss}")
  def __validation(self):
    self.model.eval()  
    val_loss = 0.0
    with torch.no_grad():
      for x,t in self.val_data:
        preds = self.model(x)
        self.predictions.append(preds)
        loss = self.loss_fn(preds,t)
        val_loss += loss.item()
    return val_loss/len(self.val_data)

In [13]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)
print(type(device))

cuda
<class 'torch.device'>


In [30]:
m = LSTMSimple(1,20,1,1,5,device=device)
m.to(device)

LSTMSimple(
  (lstm): LSTM(1, 20)
  (out): Linear(in_features=20, out_features=1, bias=True)
)

In [31]:
i = torch.randn(5,5,1,device=device)
o = m(i)

In [32]:
dataset = UnivariateTsDataset('/content/timeseries/ULTRACEMCO.NS.csv',window=6,features=['Close'],n_features=1,scaler=StandardizeInput())

In [33]:
train_set,val_set,test_set = dataset.train_val_test_split(0.60,0.20,0.20)


In [34]:
batch_size = 4
train_loader = torch.utils.data.DataLoader(train_set,batch_size=batch_size,shuffle=False,drop_last=True)
val_loader = torch.utils.data.DataLoader(val_set,batch_size=batch_size,shuffle=False,drop_last=True)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=batch_size,shuffle=False,drop_last=True)


In [None]:
for x,y in train_loader:
  print(x.shape)
  print(x.view(len(x[1]),4,-1).size(-1))
  print(x.view(len(x[1]),4,-1).shape)

In [16]:
for x,y in train_loader:
  print(y.shape)
  break

torch.Size([4, 1])


In [22]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')
# df.columns=['sales']


In [57]:
model = LSTMSimple(1,64,1,num_layers=6,batch_size=batch_size,device=device)
model.to(device)

LSTMSimple(
  (lstm): LSTM(1, 64, num_layers=6)
  (out): Linear(in_features=64, out_features=1, bias=True)
)

In [58]:
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.0001)


Perform a train test split

In [59]:
trainer = Trainer(model,train_loader,loss_fn,optimizer,val_loader,100)

In [60]:
trainer.train()

Epoch: 10 loss: 0.25761794504716007 validation loss: 0.18683143676025793
Epoch: 20 loss: 0.04980965848032307 validation loss: 0.0474492686917074
Epoch: 30 loss: 0.043023821865467146 validation loss: 0.03496761374496297
Epoch: 40 loss: 0.03906092885558286 validation loss: 0.03002475177224066
Epoch: 50 loss: 0.03622914295152037 validation loss: 0.027987165851906564
Epoch: 60 loss: 0.0341764639920762 validation loss: 0.027718110868590884
Epoch: 70 loss: 0.032652720158997305 validation loss: 0.028284863743465394
Epoch: 80 loss: 0.03148775927709519 validation loss: 0.029142254066149082
Epoch: 90 loss: 0.03057323801880961 validation loss: 0.03002117034823944
Epoch: 100 loss: 0.02983857723625584 validation loss: 0.0307955798052717


In [61]:
def test(test_data):
   model.eval()  
   test_loss = 0.0
   preds = []
   with torch.no_grad():
     for x,t in test_data:
       pred = model(x)
       loss = loss_fn(pred,t)
       test_loss += loss.item()
       preds.append(pred)
     print(test_loss/len(test_data))
   return preds

In [62]:
p = test(test_loader)

0.03108120463245238


In [None]:
p_a = []
for x in p:
  p_a.append(dataset.scaler.scaler.inverse_transform(np.array(x.view(-1).cpu()).reshape(-1,1)))

In [72]:
def predict(n_preds,past_val):
  out = []
  with torch.no_grad():
    input_ten = torch.tensor(past_val,dtype=torch.float32,device=device)
    model.eval()
    for p in range(n_preds):
      input_ten = input_ten[-len(past_val):].view(len(past_val),-1)
      print(input_ten.size())
      # model.hidden = (torch.zeros(2,1,64,device=device),torch.zeros(2,1,50,device=device))
      preds = model(input_ten)
      print(preds)
      input_ten = torch.cat((input_ten.view(-1),preds),0)
      actual_pred = dataset.scaler.scaler.inverse_transform(np.array(preds.view(-1).cpu()).reshape(-1,1))
      out.append(actual_pred)
      print(actual_pred)
  return out

In [71]:
# def predict(self,n_preds,past_val,batch_size=1):
#       out = []
#       with torch.no_grad():
#         input_ten = torch.tensor(past_val,dtype=torch.float32,device=self.device)
#         self.eval()
#         for p in range(n_preds):
#           input_ten = input_ten[-len(past_val):].view(-1,len(past_val))
#           self.hidden = (torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device),
#                          torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device))
#           preds = model(input_ten)
#           input_ten = torch.cat((input_ten.view(-1),preds),0)
#           out.append(preds)
#       return out

In [73]:
actual = predict(10,dataset.rawdata['Close'].iloc[-10:].to_numpy().reshape(1,-1))

torch.Size([1, 10])


IndexError: ignored

In [76]:
a = dataset.rawdata['Close'].iloc[-10:].to_numpy().reshape(1,-1).shape

In [78]:
b= torch.tensor(a[-len(a):].view(len(a),-1)

AttributeError: ignored

In [None]:
ap = [x.reshape(1)[0] for x in actual]
ap


NameError: ignored

In [None]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')

In [None]:
df2 = pd.DataFrame(pd.date_range('2022-11-17',periods=10).date)
df2.columns = ['Date']
df2['ap'] = ap

In [None]:
df3 = pd.concat([df,df2],axis=0)

In [None]:
df4 = df3[-30:]


In [None]:
df4['Date'] = pd.to_datetime(df4['Date'])

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
sns.lineplot(x='Date',y='Close',data=df4)
sns.lineplot(x='Date',y='ap',data=df4)
plt.ylabel("Price")
plt.xticks(rotation = 20)
plt.show()

In [None]:
torch.cuda.empty_cache()

# New Section

In [None]:
dataset.rawdata['Close'].iloc[-6:]

In [None]:
len(train_loader)

In [None]:
x = torch.randn(4, 3, 4)
lin = nn.Linear(4, 10)
out = lin(x)
print(out.shape)
torch.Size([2, 3, 10])