In [3]:
import numpy as np
import os

In [4]:
def preparedata_time_window(input_data,window):
    out_data = []
    # rearrange the input in form of output based on 3 observations in past
    for i in range(len(input_data)):
        if (i+window+1) <= len(input_data):
            out_data.append((input_data[i:i+window],input_data[i+window:i+(window+1)]))
            
    return out_data

In [5]:
import pandas as pd

In [6]:
import torch
from torch import nn     
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader

In [7]:
# df = pd.read_csv('https://raw.githubusercontent.com/riteshsv/simplelstm/main/tsdata.csv')
# df.columns=['sales']
# df.head()

In [9]:
df = pd.read_csv('/content/timeseries/ULTRACEMCO.NS.csv')
# df.columns=['sales']
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-11-16,8120.0,8139.0,7856.299805,7882.399902,7837.40918,277983
1,2021-11-17,7845.0,7948.950195,7801.0,7857.700195,7812.850586,183623
2,2021-11-18,7899.0,7930.0,7715.049805,7767.700195,7723.364258,196805
3,2021-11-22,7799.0,7808.600098,7550.600098,7653.799805,7610.11377,383629
4,2021-11-23,7620.0,7714.700195,7540.700195,7657.950195,7614.240723,475377


In [10]:
df['Date'].min()

'2021-11-16'

In [11]:
from sklearn.preprocessing import MinMaxScaler

In [12]:
data = df['Close'].values.astype(float)

In [13]:
data.shape

(249,)

13.777777777777779

In [13]:
# X = np.array(X)
# y = np.array(y)

In [14]:
# scaler = MinMaxScaler(feature_range=(-1,1))
# scaled_input = scaler.fit_transform( np.array(input_data).reshape(-1,1))

In [15]:
# train_data = preparedata_time_window(torch.tensor(scaled_input,dtype=torch.float32).view(-1),3)

In [56]:
class LSTMSimple(nn.Module):
    def __init__(self, input_dim,hidden_dim,out_size,num_layers=1,batch_size=1):
        super(LSTMSimple,self).__init__()
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_dim, hidden_dim)
        self.out = nn.Linear(hidden_dim,out_size)
        self.hidden = (torch.zeros(1*num_layers,batch_size,hidden_dim),torch.zeros(1*num_layers,batch_size,hidden_dim))
    def forward(self,x):
      self.hidden = (torch.zeros(1,1,50,device=device),torch.zeros(1,1,50,device=device))
      out,self.hidden = self.lstm(x.view(len(x),self.batch_size,-1),self.hidden)
      preds = self.out(out.view(len(x),-1))
      return preds[0]

In [59]:
class Trainer():
  def __init__(self,model,train_data,loss_fn,optimizer,val_data=None,epochs=1):
    self.model = model
    self.epochs = epochs
    self.train_data = train_data
    self.val_data = val_data
    self.loss_fn = loss_fn,
    self.optimizer = optimizer
  def train(self):
    for epoch in self.epochs:
      self.model.train()
      train_loss = 0.0  
      for x,t in self.train_data:
        self.model.zero_grad()
        preds = model(x)
        loss = self.loss_fn(preds,t)
        loss.backward()
        self.optimizer.step()
        train_loss += loss.item()
      train_loss = train_loss/len(self.train_data)
      if self.val_data != None:
        val_loss = self.__validation()
      print(f"Epoch: {epoch} loss: {train_loss} validation loss: {val_loss}")
  def __validation(self):
    self.eval()  
    val_loss = 0.0
    with torch.no_grad():
      for x,t in self.val_data:
        preds = self.model(x)
        loss = self.loss_fn(preds,t)
        val_loss += loss.item()
    return val_loss/len(self.val_data)

In [15]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [16]:
model = LSTMSimple(1,50,1)
model.to(device)

LSTMSimple(
  (lstm): LSTM(1, 50)
  (out): Linear(in_features=50, out_features=1, bias=True)
)

In [17]:
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=1e-10)

In [20]:
def train(train_data):
    for epoch in range(100):
        for x,t in train_data:
            model.zero_grad()
            model.hidden = (torch.zeros(1,1,50,device=device),torch.zeros(1,1,50,device=device))

            preds = model(x)
            loss = loss_fn(preds,t)
            loss.backward()
            optimizer.step()
        if epoch%25 == 0:
            print(f"epoch: {epoch} loss:{loss.item()}")
        

        

In [35]:
class StandardizeInput():
  def __init__(self):
    self.scaler = MinMaxScaler(feature_range=(-1,1)) 
  def __call__(self,data):
    return self.scaler.fit_transform(data)

# scaler2 = MinMaxScaler(feature_range=(-1,1))
# scaled_input2 = scaler2.fit_transform(data.reshape(-1,1))


Perform a train test split

In [34]:
# test_size = 50
# train_data = preparedata_time_window(torch.tensor(scaled_input2[:-test_size],dtype=torch.float32,device=device).view(-1),14)
# test_data = preparedata_time_window(torch.tensor(scaled_input2[-test_size:],dtype=torch.float32,device=device).view(-1),14)

In [42]:
class UnivariateTsDataset(Dataset):
    """Univariate time serices dataset."""

    def __init__(self, csv_file, target:str=None, transform=None, window=1,features:list=None,n_features=1,scaler=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
            window (integer): Number of time steps in the past
            features (list) : list of features to be used, default is all features
            n_features (integer) : number of features to be used. default is 1, min number of features required
        """
        self.transform = transform
        self.target = target
        self.window = window
        self.features = features
        self.scaler=scaler
        if len(self.features) > 0:
          self.n_features = len(self.features)
        else:
          self.n_features = n_features
        self.data = self.preprocess(pd.read_csv(csv_file))
    
    def preprocess(self,input_df):
      if self.features != None:
        input_data = input_df[self.features]
      else:
        input_data = input_df
      # scaling
      if self.scaler != None:
        input_data = self.scaler(input_data)
      # convert to tensors
      input_data_tensor = torch.tensor(input_data,dtype=torch.float32,device=device).view(-1)
      out_data = []
      # rearrange the input in form of output based on 3 observations in past
      for i in range(len(input_data_tensor)):
        if (i+self.window+1) <= len(input_data_tensor):
          out_data.append((input_data_tensor[i:i+self.window],input_data_tensor[i+self.window:i+(self.window+1)]))
            
      return out_data
    
    def train_val_test_split(self,train_size=0.8,test_size=.20,val_size=0.0):
      train_len = int(train_size*len(self.data))
      train_set = torch.utils.data.Subset(self,np.arange(train_len))
      val_len = int(len(self.data)*val_size)
      if val_size > 0:
        val_set = torch.utils.data.Subset(self,np.arange(train_len,(train_len+val_len)))
      test_set = torch.utils.data.Subset(self,np.arange((train_len+val_len),len(self.data)))
      if val_size > 0:
        return train_set,val_set,test_set
      return train_set,test_set

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample = self.data[idx]

        return sample

In [43]:
dataset = UnivariateTsDataset('/content/timeseries/ULTRACEMCO.NS.csv',window=14,features=['Close'],n_features=1,scaler=StandardizeInput())

In [44]:
train_set,val_set,test_set = dataset.train_val_test_split(0.60,0.20,0.20)


In [49]:
train_loader = torch.utils.data.DataLoader(train_set,batch_size=1,shuffle=False)
val_loader = torch.utils.data.DataLoader(val_set,batch_size=1,shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=1,shuffle=False)


In [None]:
train(train_data2)

epoch: 0 loss:0.09602939337491989
epoch: 25 loss:0.09602939337491989
epoch: 50 loss:0.09602939337491989
epoch: 75 loss:0.09602939337491989


In [None]:
with torch.no_grad():
  input_ten = torch.tensor(scaled_input2[-14:],dtype=torch.float32,device=device).view(-1)
  model.hidden = (torch.zeros(1,1,50,device=device),torch.zeros(1,1,50,device=device))
  preds = model(input_ten)
  print(preds)
  actual_pred = scaler2.inverse_transform(np.array(preds.view(-1).cpu()).reshape(-1,1))

print(actual_pred)


tensor([-0.0080], device='cuda:0')
[[6519.0356]]


In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.title('Stock Price')
plt.ylabel('Price')
plt.grid(True)
plt.autoscale(axis='x', tight=True)
plt.plot(flight_data['passengers'])
plt.plot(x,actual_predictions)
plt.show()

In [8]:
!git clone https://github.com/riteshsv/timeseries.git

Cloning into 'timeseries'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 9 (delta 0), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (9/9), done.


# New Section