In [1]:
!git clone https://github.com/riteshsv/timeseries.git

Cloning into 'timeseries'...
remote: Enumerating objects: 30, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 30 (delta 14), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (30/30), done.


In [2]:
import numpy as np
import os

In [3]:
import pandas as pd

In [4]:
import torch
from torch import nn     
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader

In [5]:
from sklearn.preprocessing import MinMaxScaler

In [6]:
def preparedata_time_window(input_data,window):
    out_data = []
    # rearrange the input in form of output based on 3 observations in past
    for i in range(len(input_data)):
        if (i+window+1) <= len(input_data):
            out_data.append((input_data[i:i+window],input_data[i+window:i+(window+1)]))
            
    return out_data

In [7]:
class StandardizeInput():
  def __init__(self):
    self.scaler = MinMaxScaler(feature_range=(-1,1)) 
  def __call__(self,data):
    return self.scaler.fit_transform(data)

# scaler2 = MinMaxScaler(feature_range=(-1,1))
# scaled_input2 = scaler2.fit_transform(data.reshape(-1,1))


In [8]:
class UnivariateTsDataset(Dataset):
    """Univariate time serices dataset."""

    def __init__(self, csv_file, target:str=None, transform=None, window=1,features:list=None,n_features=1,scaler=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
            window (integer): Number of time steps in the past
            features (list) : list of features to be used, default is all features
            n_features (integer) : number of features to be used. default is 1, min number of features required
        """
        self.transform = transform
        self.target = target
        self.window = window
        self.features = features
        self.scaler=scaler
        if len(self.features) > 0:
          self.n_features = len(self.features)
        else:
          self.n_features = n_features
        self.rawdata = pd.read_csv(csv_file)
        self.data = self.preprocess(self.rawdata)
        
    
    def preprocess(self,input_df):
      if self.features != None:
        input_data = input_df[self.features]
      else:
        input_data = input_df
      # scaling
      if self.scaler != None:
        input_data = self.scaler(input_data)
      # convert to tensors
      input_data_tensor = torch.tensor(input_data,dtype=torch.float32,device=device).view(-1)
      out_data = []
      # rearrange the input in form of output based on 3 observations in past
      for i in range(len(input_data_tensor)):
        if (i+self.window+1) <= len(input_data_tensor):
          out_data.append((input_data_tensor[i:i+self.window],input_data_tensor[i+self.window:i+(self.window+1)]))
            
      return out_data
    
    def train_val_test_split(self,train_size=0.8,test_size=.20,val_size=0.0):
      train_len = int(train_size*len(self.data))
      train_set = torch.utils.data.Subset(self,np.arange(train_len))
      val_len = int(len(self.data)*val_size)
      if val_size > 0:
        val_set = torch.utils.data.Subset(self,np.arange(train_len,(train_len+val_len)))
      test_set = torch.utils.data.Subset(self,np.arange((train_len+val_len),len(self.data)))
      if val_size > 0:
        return train_set,val_set,test_set
      return train_set,test_set

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]

        return sample

In [9]:
class LSTMSimple(nn.Module):
    def __init__(self, input_dim,hidden_dim,out_size,num_layers=1,batch_size=1,device=None):
        super(LSTMSimple,self).__init__()
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_dim, hidden_dim,num_layers=num_layers)
        self.out = nn.Linear(hidden_dim,out_size)
        self.hidden_dim = hidden_dim
        self.n_layers = num_layers
        if device == None:
          if torch.cuda.is_available():
            self.device = torch.device('cuda')
          else:
            self.device = torch.device('cpu')
        else:
          self.device = device
        self.hidden = (torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device),torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device))
    def forward(self,x):
      self.hidden = (torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device),torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device))
      out,self.hidden = self.lstm(x.view(len(x[1]),self.batch_size,-1),self.hidden)
      preds = self.out(out[-1])
      return preds
    def predict(self,n_preds,past_val,batch_size=1):
      out = []
      with torch.no_grad():
        input_ten = torch.tensor(past_val,dtype=torch.float32,device=self.device)
        self.eval()
        for p in range(n_preds):
          input_ten = input_ten[-len(past_val):].view(-1,len(past_val))
          self.hidden = (torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device),
                         torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device))
          preds = model(input_ten)
          input_ten = torch.cat((input_ten.view(-1),preds),0)
          out.append(preds)
      return out
    # def predict(n_preds,input_ten):
    #   out = []
    #   with torch.no_grad():
    #     for p in range(n_preds):
    #       input_ten = input_ten[-6:].view(-1,6)
    #       # model.hidden = (torch.zeros(2,1,64,device=device),torch.zeros(2,1,50,device=device))
    #       preds = model(input_ten)
    #       input_ten = torch.cat((input_ten.view(-1),preds),0)
    #       actual_pred = dataset.scaler.scaler.inverse_transform(np.array(preds.view(-1).cpu()).reshape(-1,1))
    #       out.append(actual_pred)
    #   print(actual_pred)
    #   return out

In [10]:
class Trainer():
  def __init__(self,model,train_data,loss_fn,optimizer,val_data=None,epochs=1):
    self.model = model
    self.epochs = epochs
    self.train_data = train_data
    self.val_data = val_data
    self.loss_fn = loss_fn
    self.optimizer = optimizer
    self.predictions = []
  def train(self):
    for epoch in range(self.epochs):
      self.model.train()
      train_loss = 0.0  
      for x,t in self.train_data:
        self.model.zero_grad()
        preds = model(x)
        self.predictions.append(preds)
        loss = self.loss_fn(preds,t)
        loss.backward()
        self.optimizer.step()
        train_loss += loss.item()
      train_loss = train_loss/len(self.train_data)
      if self.val_data != None:
        val_loss = self.__validation()
      if (epoch+1)%10 == 0:
        print(f"Epoch: {epoch+1} loss: {train_loss} validation loss: {val_loss}")
  def __validation(self):
    self.model.eval()  
    val_loss = 0.0
    with torch.no_grad():
      for x,t in self.val_data:
        preds = self.model(x)
        self.predictions.append(preds)
        loss = self.loss_fn(preds,t)
        val_loss += loss.item()
    return val_loss/len(self.val_data)

In [11]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)
print(type(device))

cuda
<class 'torch.device'>


In [12]:
m = LSTMSimple(1,20,1,1,5,device=device)
m.to(device)

LSTMSimple(
  (lstm): LSTM(1, 20)
  (out): Linear(in_features=20, out_features=1, bias=True)
)

In [13]:
i = torch.randn(5,5,1,device=device)
o = m(i)

In [14]:
dataset = UnivariateTsDataset('/content/timeseries/ULTRACEMCO.NS.csv',window=6,features=['Close'],n_features=1,scaler=StandardizeInput())

In [15]:
train_set,val_set,test_set = dataset.train_val_test_split(0.60,0.20,0.20)


In [16]:
batch_size = 4
train_loader = torch.utils.data.DataLoader(train_set,batch_size=batch_size,shuffle=False,drop_last=True)
val_loader = torch.utils.data.DataLoader(val_set,batch_size=batch_size,shuffle=False,drop_last=True)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=batch_size,shuffle=False,drop_last=True)


In [41]:
for x,y in train_loader:
  print(x.shape)
  break
  print(x.view(len(x[1]),4,-1).size(-1))
  print(x.view(len(x[1]),4,-1).shape)

torch.Size([4, 6])


In [18]:
for x,y in train_loader:
  print(y.shape)
  break

torch.Size([4, 1])


In [19]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')
# df.columns=['sales']


In [20]:
model = LSTMSimple(1,64,1,num_layers=6,batch_size=batch_size,device=device)
model.to(device)

LSTMSimple(
  (lstm): LSTM(1, 64, num_layers=6)
  (out): Linear(in_features=64, out_features=1, bias=True)
)

In [21]:
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.0001)


Perform a train test split

In [22]:
trainer = Trainer(model,train_loader,loss_fn,optimizer,val_loader,100)

In [23]:
trainer.train()

Epoch: 10 loss: 0.24232666410454032 validation loss: 0.1794354443748792
Epoch: 20 loss: 0.05348757805121648 validation loss: 0.05647265026345849
Epoch: 30 loss: 0.044243403277101204 validation loss: 0.03820899044755303
Epoch: 40 loss: 0.04009845449925504 validation loss: 0.03195512791959724
Epoch: 50 loss: 0.03691489284715822 validation loss: 0.028994610227528028
Epoch: 60 loss: 0.03454267478729081 validation loss: 0.028436059709444333
Epoch: 70 loss: 0.03279267502431241 validation loss: 0.029157445234886836
Epoch: 80 loss: 0.03149212479345604 validation loss: 0.030320770558319055
Epoch: 90 loss: 0.030514820616291318 validation loss: 0.0314699832573145
Epoch: 100 loss: 0.029769608212518506 validation loss: 0.0324059906124603


In [24]:
def test(test_data):
   model.eval()  
   test_loss = 0.0
   preds = []
   with torch.no_grad():
     for x,t in test_data:
       pred = model(x)
       loss = loss_fn(pred,t)
       test_loss += loss.item()
       preds.append(pred)
     print(test_loss/len(test_data))
   return preds

In [40]:
import os
os.makedirs('/content/timeseries/checkpoints/',exist_ok=True)


In [39]:
torch.save(model.state_dict(),f"/content/timeseries/checkpoints/lstmchkpt_1")

In [25]:
p = test(test_loader)

0.03163832312566228


In [26]:
p_a = []
for x in p:
  p_a.append(dataset.scaler.scaler.inverse_transform(np.array(x.view(-1).cpu()).reshape(-1,1)))

In [97]:
def predict(n_preds,past_val):
  """
  Args:
    past_val:nparray of shape [batch_size,seq_length]
    n_preds:int number of forward times steps for which prediction is required
  """
  out = []
  with torch.no_grad():
    input_ten = torch.tensor(past_val,dtype=torch.float32,device=device)
    model.eval()
    for p in range(n_preds):
      input_ten = input_ten[:,-len(past_val[1]):].view(-1,len(past_val[1]))
      print(input_ten.size())
      preds = model(input_ten)
      print(preds)
      print(preds.size())
      input_ten = torch.cat((input_ten,preds),1)
      actual_pred = dataset.scaler.scaler.inverse_transform(preds.cpu().numpy())
      out.append(actual_pred)
      print(actual_pred)
  return out

In [28]:
# def predict(self,n_preds,past_val,batch_size=1):
#       out = []
#       with torch.no_grad():
#         input_ten = torch.tensor(past_val,dtype=torch.float32,device=self.device)
#         self.eval()
#         for p in range(n_preds):
#           input_ten = input_ten[-len(past_val):].view(-1,len(past_val))
#           self.hidden = (torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device),
#                          torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device))
#           preds = model(input_ten)
#           input_ten = torch.cat((input_ten.view(-1),preds),0)
#           out.append(preds)
#       return out

In [49]:
pred_data=dataset.rawdata['Close'].iloc[-40:].to_numpy().reshape(4,-1)
pred_data.shape

(4, 10)

In [98]:
actual = predict(10,pred_data)

torch.Size([4, 10])
tensor([[1.4473],
        [1.4473],
        [1.4473],
        [1.4473]], device='cuda:0')
torch.Size([4, 1])
[[8487.434]
 [8487.433]
 [8487.431]
 [8487.431]]
torch.Size([4, 10])


RuntimeError: ignored

In [42]:
a = dataset.rawdata['Close'].iloc[-10:].to_numpy().reshape(1,-1).shape

In [59]:
t1 = torch.randn(4,10)
t2 = torch.randn(4,1)


In [84]:
t3 = torch.cat((t1,t2),1)

In [63]:
print(t2)
print(t2.size())
t4 = t2.view(-1)
print(t4.size())
t4

tensor([[-2.5128e+00],
        [-7.8202e-01],
        [ 4.1493e-01],
        [-2.3969e-03]])
torch.Size([4, 1])
torch.Size([4])


tensor([-2.5128e+00, -7.8202e-01,  4.1493e-01, -2.3969e-03])

In [96]:
t7 = t1[:,-len(t1[0]):].view(-1,len(t1[0]))
t7

tensor([[-1.2124,  0.4552,  1.1292, -0.3701, -0.7648, -0.8840, -0.8880,  0.4540,
          0.3168, -0.1377],
        [-1.3522, -1.4382, -2.3599, -2.3989, -1.1829, -0.5666, -1.9389, -0.2141,
          0.2763,  1.8017],
        [ 2.1473, -0.4176,  0.7828, -1.2582,  1.3128, -1.0634,  1.0524, -0.0430,
         -1.3005, -0.5499],
        [-1.4483,  0.7166,  0.7265,  0.8216,  0.3388, -0.0641,  0.4313, -0.7465,
          2.4237,  0.5834]])

In [85]:
t3

tensor([[-1.2124e+00,  4.5516e-01,  1.1292e+00, -3.7006e-01, -7.6477e-01,
         -8.8405e-01, -8.8804e-01,  4.5398e-01,  3.1685e-01, -1.3770e-01,
         -2.5128e+00],
        [-1.3522e+00, -1.4382e+00, -2.3599e+00, -2.3989e+00, -1.1829e+00,
         -5.6661e-01, -1.9389e+00, -2.1409e-01,  2.7630e-01,  1.8017e+00,
         -7.8202e-01],
        [ 2.1473e+00, -4.1756e-01,  7.8280e-01, -1.2582e+00,  1.3128e+00,
         -1.0634e+00,  1.0524e+00, -4.2977e-02, -1.3005e+00, -5.4986e-01,
          4.1493e-01],
        [-1.4483e+00,  7.1657e-01,  7.2648e-01,  8.2164e-01,  3.3879e-01,
         -6.4062e-02,  4.3131e-01, -7.4650e-01,  2.4237e+00,  5.8338e-01,
         -2.3969e-03]])

In [93]:
t3[:,-9:]

tensor([[ 1.1292e+00, -3.7006e-01, -7.6477e-01, -8.8405e-01, -8.8804e-01,
          4.5398e-01,  3.1685e-01, -1.3770e-01, -2.5128e+00],
        [-2.3599e+00, -2.3989e+00, -1.1829e+00, -5.6661e-01, -1.9389e+00,
         -2.1409e-01,  2.7630e-01,  1.8017e+00, -7.8202e-01],
        [ 7.8280e-01, -1.2582e+00,  1.3128e+00, -1.0634e+00,  1.0524e+00,
         -4.2977e-02, -1.3005e+00, -5.4986e-01,  4.1493e-01],
        [ 7.2648e-01,  8.2164e-01,  3.3879e-01, -6.4062e-02,  4.3131e-01,
         -7.4650e-01,  2.4237e+00,  5.8338e-01, -2.3969e-03]])

In [65]:
pr = dataset.scaler.scaler.inverse_transform(t2.cpu().numpy())

In [66]:
pr

array([[3131.2292],
       [5472.1255],
       [7091.0664],
       [6526.608 ]], dtype=float32)

In [None]:
b= torch.tensor(a[-len(a):].view(len(a),-1)

In [None]:
ap = [x.reshape(1)[0] for x in actual]
ap


In [None]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')

In [None]:
df2 = pd.DataFrame(pd.date_range('2022-11-17',periods=10).date)
df2.columns = ['Date']
df2['ap'] = ap

In [None]:
df3 = pd.concat([df,df2],axis=0)

In [None]:
df4 = df3[-30:]


In [None]:
df4['Date'] = pd.to_datetime(df4['Date'])

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
sns.lineplot(x='Date',y='Close',data=df4)
sns.lineplot(x='Date',y='ap',data=df4)
plt.ylabel("Price")
plt.xticks(rotation = 20)
plt.show()

In [None]:
torch.cuda.empty_cache()

# New Section

In [None]:
dataset.rawdata['Close'].iloc[-6:]

In [None]:
len(train_loader)

In [None]:
x = torch.randn(4, 3, 4)
lin = nn.Linear(4, 10)
out = lin(x)
print(out.shape)
torch.Size([2, 3, 10])