In [2]:
import sys
import os


current_dir = os.getcwd()

parent_dir = os.path.dirname(current_dir)

sys.path.append(parent_dir)

In [3]:
import yfinance as yf
import pandas as pd
from Data.Slider import Slider
from Data import Data_prep
from models.LSTM import LSTM
from torch import nn
import torch
from tqdm import tqdm

In [4]:

TICKER = 'Goldbees.ns'
START = '2009-01-02'
END = '2026-02-02'

data = yf.download(tickers = TICKER,
                   start = START,
                   end = END)
data

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,GOLDBEES.NS,GOLDBEES.NS,GOLDBEES.NS,GOLDBEES.NS,GOLDBEES.NS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2009-01-02,13.370000,13.430000,13.300000,0.000000,1043400
2009-01-05,13.155500,13.320000,13.150000,0.000000,1294700
2009-01-06,13.040000,13.200000,13.020100,0.000000,1236000
2009-01-07,13.215000,13.250000,13.060000,0.000000,1109000
2009-01-09,13.168900,13.200000,13.060000,0.000000,723900
...,...,...,...,...,...
2026-01-23,127.419998,132.479996,126.000000,129.100006,141941218
2026-01-27,131.449997,134.000000,129.029999,130.550003,139657546
2026-01-28,135.820007,137.100006,132.000000,132.000000,184193702
2026-01-29,146.529999,148.139999,141.639999,143.000000,271824296


In [5]:
df = pd.DataFrame(data)
df.set_index

<bound method DataFrame.set_index of Price            Close        High         Low        Open      Volume
Ticker     GOLDBEES.NS GOLDBEES.NS GOLDBEES.NS GOLDBEES.NS GOLDBEES.NS
Date                                                                  
2009-01-02   13.370000   13.430000   13.300000    0.000000     1043400
2009-01-05   13.155500   13.320000   13.150000    0.000000     1294700
2009-01-06   13.040000   13.200000   13.020100    0.000000     1236000
2009-01-07   13.215000   13.250000   13.060000    0.000000     1109000
2009-01-09   13.168900   13.200000   13.060000    0.000000      723900
...                ...         ...         ...         ...         ...
2026-01-23  127.419998  132.479996  126.000000  129.100006   141941218
2026-01-27  131.449997  134.000000  129.029999  130.550003   139657546
2026-01-28  135.820007  137.100006  132.000000  132.000000   184193702
2026-01-29  146.529999  148.139999  141.639999  143.000000   271824296
2026-01-30  131.119995  142.800003  127.

In [6]:
df.columns

MultiIndex([( 'Close', 'GOLDBEES.NS'),
            (  'High', 'GOLDBEES.NS'),
            (   'Low', 'GOLDBEES.NS'),
            (  'Open', 'GOLDBEES.NS'),
            ('Volume', 'GOLDBEES.NS')],
           names=['Price', 'Ticker'])

In [7]:
feat_cols = ['Open','High','Low','Volume']
feartures = df[feat_cols]
feartures = feartures.values

label_col = ['Close']
label = df[label_col]
label = label.values

In [8]:
train_size = int(len(feartures)*0.8)
X_train = feartures[:train_size]
X_test = feartures[train_size:]
Y_train = label[:train_size]
Y_test = label[train_size:]
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(3367, 4) (3367, 1)
(842, 4) (842, 1)


In [9]:
from sklearn.preprocessing import MinMaxScaler

process_feat = MinMaxScaler(feature_range=(0,1))
process_targ = MinMaxScaler(feature_range=(0,1))

X_train = process_feat.fit_transform(X_train)
X_test = process_feat.fit_transform(X_test)

Y_train = process_targ.fit_transform(Y_train)
Y_test = process_targ.fit_transform(Y_test)

print(X_train[0],X_test[0],Y_train[0],Y_test[0])


[0.         0.25121376 0.26714483 0.00626884] [0.05787478 0.01108796 0.05420995 0.00693024] [0.26691172] [0.01227585]


In [10]:
slidertr = Slider(feature = X_train,
                labels = Y_train,
                length = 60)

sliderts = Slider(feature = X_test,
                labels = Y_test,
                length = 60)

x_trainf,y_trainf = slidertr.slider()
x_testf,y_testf = sliderts.slider()

In [11]:
#train data prep and load
x_t,y_t = Data_prep.convertNumpyToTensors(x_trainf,y_trainf)
train_dataset = Data_prep.createTensorDataset(x_t,y_t)
train_data_load = Data_prep.loadData(dataset=train_dataset,
                                               batch=32,
                                               num_worker=0)

In [12]:
#test data pred and load
x_te,y_te = Data_prep.convertNumpyToTensors(x_testf,y_testf)
test_dataset = Data_prep.createTensorDataset(x_te,y_te)
test_data_load = Data_prep.loadData(dataset=test_dataset,
                                               batch=32,
                                               num_worker=0,
                                               shuffle=False)

In [13]:
x,y = next(iter(train_data_load))
print(f'Shape of features {x.shape}')
print(f'Shape of target {y.shape}')

Shape of features torch.Size([32, 60, 4])
Shape of target torch.Size([32, 1])


In [14]:
device = 'cuda' if(torch.cuda.is_available()) else 'cpu'

In [15]:
#model
INPUT_SIZE = 4
HIDDEN_UNITS = 64
OUT_FEATURES = 1
model = LSTM(in_size=INPUT_SIZE,
             hidden_units=HIDDEN_UNITS,
             out_features=OUT_FEATURES).to(device)

In [16]:
#loss funtiona and Optimizer
loss_fn = nn.MSELoss()
optimizer =torch.optim.Adam(params=model.parameters(),
                            lr = 1e-4)

In [19]:
#training and testing loop

def traintest(model:nn.Module,
          device:torch.device,
          Epoch:int,
          loss_fn:nn.MSELoss,
          optimizer:torch.optim.Adam,
          traindataloader:torch.utils.data.DataLoader,
          testdataloader: torch.utils.data.DataLoader,
          interval:int = 10):
    
  for epoch in range(Epoch):
    model.train()
    train_running_loss = 0.0
    test_running_loss = 0.0

    loop = iter(traindataloader, desc = f'Epoch: {epoch+1}/{Epoch}')

    for x,y in tqdm(loop):
      train_pred = model(x)

      train_loss = loss_fn(train_pred,y) 
      train_running_loss += train_loss.item()

      optimizer.zero_grad()
      train_loss.backward()     
      optimizer.step()

    total_train_loss = train_running_loss/len(traindataloader)

    model.eval()
    with torch.inference_mode():
      
      for x,y in testdataloader:
        test_pred = model(x)

        test_loss = loss_fn(test_pred,y)
        test_running_loss += test_loss.item()

      total_test_loss = test_running_loss/len(testdataloader)

    if epoch%interval == 0:
      print(f'|Train Loss: {total_train_loss : 0.4f} | Test Loss: {total_test_loss: 0.4f} |')
    

In [None]:
#training and testing
EPOCH=50
INTERVAL = 5
traintest(model=model,
          device=device,
          Epoch=EPOCH,
          loss_fn=loss_fn,
          optimizer=optimizer,
          traindataloader=train_data_load,
          testdataloader=test_data_load,
          interval=INTERVAL
          )