In [3]:
import numpy as np
import os

In [4]:
import pandas as pd

In [5]:
import torch
from torch import nn     
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader

In [6]:
from sklearn.preprocessing import MinMaxScaler

In [7]:
def preparedata_time_window(input_data,window):
    out_data = []
    # rearrange the input in form of output based on 3 observations in past
    for i in range(len(input_data)):
        if (i+window+1) <= len(input_data):
            out_data.append((input_data[i:i+window],input_data[i+window:i+(window+1)]))
            
    return out_data

In [8]:
class StandardizeInput():
  def __init__(self):
    self.scaler = MinMaxScaler(feature_range=(-1,1)) 
  def __call__(self,data):
    return self.scaler.fit_transform(data)

# scaler2 = MinMaxScaler(feature_range=(-1,1))
# scaled_input2 = scaler2.fit_transform(data.reshape(-1,1))


In [9]:
class UnivariateTsDataset(Dataset):
    """Univariate time serices dataset."""

    def __init__(self, csv_file, target:str=None, transform=None, window=1,features:list=None,n_features=1,scaler=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
            window (integer): Number of time steps in the past
            features (list) : list of features to be used, default is all features
            n_features (integer) : number of features to be used. default is 1, min number of features required
        """
        self.transform = transform
        self.target = target
        self.window = window
        self.features = features
        self.scaler=scaler
        if len(self.features) > 0:
          self.n_features = len(self.features)
        else:
          self.n_features = n_features
        self.rawdata = pd.read_csv(csv_file)
        self.data = self.preprocess(self.rawdata)
        
    
    def preprocess(self,input_df):
      if self.features != None:
        input_data = input_df[self.features]
      else:
        input_data = input_df
      # scaling
      if self.scaler != None:
        input_data = self.scaler(input_data)
      # convert to tensors
      input_data_tensor = torch.tensor(input_data,dtype=torch.float32,device=device).view(-1)
      out_data = []
      # rearrange the input in form of output based on 3 observations in past
      for i in range(len(input_data_tensor)):
        if (i+self.window+1) <= len(input_data_tensor):
          out_data.append((input_data_tensor[i:i+self.window],input_data_tensor[i+self.window:i+(self.window+1)]))
            
      return out_data
    
    def train_val_test_split(self,train_size=0.8,test_size=.20,val_size=0.0):
      train_len = int(train_size*len(self.data))
      train_set = torch.utils.data.Subset(self,np.arange(train_len))
      val_len = int(len(self.data)*val_size)
      if val_size > 0:
        val_set = torch.utils.data.Subset(self,np.arange(train_len,(train_len+val_len)))
      test_set = torch.utils.data.Subset(self,np.arange((train_len+val_len),len(self.data)))
      if val_size > 0:
        return train_set,val_set,test_set
      return train_set,test_set

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]

        return sample

In [305]:
class LSTMSimple(nn.Module):
    def __init__(self, input_dim,hidden_dim,out_size,num_layers=1,batch_size=1,device=None):
        super(LSTMSimple,self).__init__()
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_dim, hidden_dim,num_layers=num_layers)
        self.out = nn.Linear(hidden_dim,out_size)
        self.hidden_dim = hidden_dim
        self.n_layers = num_layers
        if device == None:
          if torch.cuda.is_available():
            self.device = torch.device('cuda')
          else:
            self.device = torch.device('cpu')
        else:
          self.device = device
        self.hidden = (torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device),torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device))
    def forward(self,x):
      self.hidden = (torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device),torch.zeros(1*self.n_layers,self.batch_size,self.hidden_dim,device=self.device))
      out,self.hidden = self.lstm(x.view(len(x[1]),self.batch_size,-1),self.hidden)
      preds = self.out(out[-1])
      return preds[0]
    def predict(self,n_preds,past_val,batch_size=1):
      out = []
      with torch.no_grad():
        input_ten = torch.tensor(past_val,dtype=torch.float32,device=self.device)
        self.eval()
        for p in range(n_preds):
          input_ten = input_ten[-len(past_val):].view(-1,len(past_val))
          self.hidden = (torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device),
                         torch.zeros(1*self.n_layers,batch_size,self.hidden_dim,device=self.device))
          preds = model(input_ten)
          input_ten = torch.cat((input_ten.view(-1),preds),0)
          out.append(preds)
      return out
    # def predict(n_preds,input_ten):
    #   out = []
    #   with torch.no_grad():
    #     for p in range(n_preds):
    #       input_ten = input_ten[-6:].view(-1,6)
    #       # model.hidden = (torch.zeros(2,1,64,device=device),torch.zeros(2,1,50,device=device))
    #       preds = model(input_ten)
    #       input_ten = torch.cat((input_ten.view(-1),preds),0)
    #       actual_pred = dataset.scaler.scaler.inverse_transform(np.array(preds.view(-1).cpu()).reshape(-1,1))
    #       out.append(actual_pred)
    #   print(actual_pred)
    #   return out

In [82]:
class Trainer():
  def __init__(self,model,train_data,loss_fn,optimizer,val_data=None,epochs=1):
    self.model = model
    self.epochs = epochs
    self.train_data = train_data
    self.val_data = val_data
    self.loss_fn = loss_fn
    self.optimizer = optimizer
    self.predictions = []
  def train(self):
    for epoch in range(self.epochs):
      self.model.train()
      train_loss = 0.0  
      for x,t in self.train_data:
        self.model.zero_grad()
        preds = model(x)
        self.predictions.append(preds)
        loss = self.loss_fn(preds,t)
        loss.backward()
        self.optimizer.step()
        train_loss += loss.item()
      train_loss = train_loss/len(self.train_data)
      if self.val_data != None:
        val_loss = self.__validation()
      if (epoch+1)%10 == 0:
        print(f"Epoch: {epoch+1} loss: {train_loss} validation loss: {val_loss}")
  def __validation(self):
    self.model.eval()  
    val_loss = 0.0
    with torch.no_grad():
      for x,t in self.val_data:
        preds = self.model(x)
        self.predictions.append(preds)
        loss = self.loss_fn(preds,t)
        val_loss += loss.item()
    return val_loss/len(self.val_data)

In [12]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)
print(type(device))

cuda
<class 'torch.device'>


In [252]:
# !git clone https://github.com/riteshsv/timeseries.git

In [276]:
dataset = UnivariateTsDataset('/content/timeseries/ULTRACEMCO.NS.csv',window=6,features=['Close'],n_features=1,scaler=StandardizeInput())

In [277]:
train_set,val_set,test_set = dataset.train_val_test_split(0.60,0.20,0.20)


In [291]:
batch_size = 4
train_loader = torch.utils.data.DataLoader(train_set,batch_size=batch_size,shuffle=False,drop_last=True)
val_loader = torch.utils.data.DataLoader(val_set,batch_size=batch_size,shuffle=False,drop_last=True)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=batch_size,shuffle=False,drop_last=True)


In [None]:
for x,y in train_loader:
  print(x.shape)
  print(x.view(len(x[1]),4,-1).size(-1))
  print(x.view(len(x[1]),4,-1).shape)

In [118]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')
# df.columns=['sales']
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-11-16,8120.0,8139.0,7856.299805,7882.399902,7837.40918,277983
1,2021-11-17,7845.0,7948.950195,7801.0,7857.700195,7812.850586,183623
2,2021-11-18,7899.0,7930.0,7715.049805,7767.700195,7723.364258,196805
3,2021-11-22,7799.0,7808.600098,7550.600098,7653.799805,7610.11377,383629
4,2021-11-23,7620.0,7714.700195,7540.700195,7657.950195,7614.240723,475377


In [294]:
model = LSTMSimple(1,64,1,num_layers=3,batch_size=batch_size,device=device)
model.to(device)

LSTMSimple(
  (lstm): LSTM(1, 64, num_layers=3)
  (out): Linear(in_features=64, out_features=1, bias=True)
)

In [295]:
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)


Perform a train test split

In [296]:
trainer = Trainer(model,train_loader,loss_fn,optimizer,val_loader,100)

In [297]:
trainer.train()

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 10 loss: 0.055458933179680675 validation loss: 0.02213104098943101
Epoch: 20 loss: 0.04445620105575977 validation loss: 0.016910217024512047
Epoch: 30 loss: 0.03511473368982681 validation loss: 0.014108667341740025
Epoch: 40 loss: 0.026704333282800183 validation loss: 0.011836867764941417
Epoch: 50 loss: 0.02048549820513775 validation loss: 0.008779170942337563
Epoch: 60 loss: 0.015788751634924363 validation loss: 0.005641722302243579
Epoch: 70 loss: 0.014025601015115777 validation loss: 0.00449212690849284
Epoch: 80 loss: 0.013448600644349225 validation loss: 0.004388152413109007
Epoch: 90 loss: 0.013440370129602443 validation loss: 0.004502017304427379
Epoch: 100 loss: 0.01368926310290893 validation loss: 0.004378821133286692


In [298]:
def test(test_data):
   model.eval()  
   test_loss = 0.0
   preds = []
   with torch.no_grad():
     for x,t in test_data:
       pred = model(x)
       loss = loss_fn(pred,t)
       test_loss += loss.item()
       preds.append(pred)
     print(test_loss/len(test_data))
   return preds

In [299]:
p = test(test_loader)

0.013856425367218131


In [300]:
p_a = []
for x in p:
  p_a.append(dataset.scaler.scaler.inverse_transform(np.array(x.view(-1).cpu()).reshape(-1,1)))

In [303]:
def predict(n_preds,past_val):
  out = []
  with torch.no_grad():
    input_ten = torch.tensor(past_val,dtype=torch.float32,device=device)
    model.eval()
    for p in range(n_preds):
      input_ten = input_ten[-len(past_val):].view(-1,len(past_val))
      # model.hidden = (torch.zeros(2,1,64,device=device),torch.zeros(2,1,50,device=device))
      preds = model(input_ten)
      input_ten = torch.cat((input_ten.view(-1),preds),0)
      actual_pred = dataset.scaler.scaler.inverse_transform(np.array(preds.view(-1).cpu()).reshape(-1,1))
      out.append(actual_pred)
      print(actual_pred)
  return out

In [304]:
actual = predict(10,dataset.rawdata['Close'].iloc[-1:].to_nparray().reshape(-1,1))

IndexError: ignored

In [310]:
dataset.rawdata['Close'].iloc[-10:].to_numpy().reshape(1,-1).shape

(1, 10)

In [None]:
ap = [x.reshape(1)[0] for x in actual]
ap


In [None]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')

In [None]:
df2 = pd.DataFrame(pd.date_range('2022-11-17',periods=10).date)
df2.columns = ['Date']
df2['ap'] = ap

In [None]:
df3 = pd.concat([df,df2],axis=0)

In [None]:
df4 = df3[-30:]


In [None]:
df4['Date'] = pd.to_datetime(df4['Date'])

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
sns.lineplot(x='Date',y='Close',data=df4)
sns.lineplot(x='Date',y='ap',data=df4)
plt.ylabel("Price")
plt.xticks(rotation = 20)
plt.show()

In [None]:
torch.cuda.empty_cache()

# New Section

In [None]:
dataset.rawdata['Close'].iloc[-6:]

In [None]:
len(train_loader)