In [1]:
import numpy as np
import random
import pandas as pd
import datetime
import math, time
import itertools
from operator import itemgetter
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
import torch
import torch.nn as nn
from torch.autograd import Variable
from tqdm import tqdm
import json
import matplotlib.pyplot as plt
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
dpath = "/content/drive/My Drive/LAB/data/"
files = os.listdir(dpath)
files.sort()
print(files)

['ADANIPORTS.csv', 'ASIANPAINT.csv', 'AXISBANK.csv', 'BAJAJ-AUTO.csv', 'BAJAJFINSV.csv', 'BAJFINANCE.csv', 'BHARTIARTL.csv', 'BPCL.csv', 'BRITANNIA.csv', 'CIPLA.csv', 'COALINDIA.csv', 'DRREDDY.csv', 'EICHERMOT.csv', 'GAIL.csv', 'GRASIM.csv', 'HCLTECH.csv', 'HDFC.csv', 'HDFCBANK.csv', 'HEROMOTOCO.csv', 'HINDALCO.csv', 'HINDUNILVR.csv', 'ICICIBANK.csv', 'INDUSINDBK.csv', 'INFRATEL.csv', 'INFY.csv', 'IOC.csv', 'ITC.csv', 'JSWSTEEL.csv', 'KOTAKBANK.csv', 'LT.csv', 'MARUTI.csv', 'MM.csv', 'NESTLEIND.csv', 'NTPC.csv', 'ONGC.csv', 'POWERGRID.csv', 'RELIANCE.csv', 'SBIN.csv', 'SHREECEM.csv', 'SUNPHARMA.csv', 'TATAMOTORS.csv', 'TATASTEEL.csv', 'TCS.csv', 'TECHM.csv', 'TITAN.csv', 'ULTRACEMCO.csv', 'UPL.csv', 'VEDL.csv', 'WIPRO.csv', 'ZEEL.csv']


In [None]:

df=pd.read_csv(dpath + files[0]).set_index('Date')
df = df[["Close"]]
print (type(df))

In [5]:
df.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2007-11-27,962.9
2007-11-28,893.9
2007-11-29,884.2
2007-11-30,921.55
2007-12-03,969.3


In [6]:
scaler = MinMaxScaler(feature_range=(-1, 1))
df['Close'] = scaler.fit_transform(df['Close'].values.reshape(-1,1))

In [7]:
df.head()
# print (type(df))

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2007-11-27,-0.640313
2007-11-28,-0.675519
2007-11-29,-0.680468
2007-11-30,-0.661411
2007-12-03,-0.637048


In [8]:
def load_data(stock, look_back=5):
    data_raw = stock.values #used with pandas DataFrames & Series to access the underlying NumPy array of the data
    data = []

    for index in range(len(data_raw) - look_back):
        data.append(data_raw[index: index + look_back])

    # print(len(data))
    # print(data[0])

    data = np.array(data);
    test_set_size = int(np.round(0.2*data.shape[0]));
    train_set_size = data.shape[0] - (test_set_size);

    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]

    x_test = data[train_set_size:,:-1]
    y_test = data[train_set_size:,-1,:]

    return [x_train, y_train, x_test, y_test]

In [9]:
#test data it is working fine or not
x_train, y_train, x_test, y_test = load_data(df)

In [10]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# we have nd array in x_train , y_train , x_test , y_test

(2654, 4, 1)
(2654, 1)
(663, 4, 1)
(663, 1)


In [11]:
x_train = torch.from_numpy(x_train).type(torch.Tensor)
x_test = torch.from_numpy(x_test).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)

In [12]:
print(type(x_train))

<class 'torch.Tensor'>


## MODEL

In [13]:
print(x_train.shape)
print(y_train.shape)

torch.Size([2654, 4, 1])
torch.Size([2654, 1])


In [14]:
import torch
import torch.nn as nn

class StockPriceTransformer(nn.Module):
    def __init__(self, d_model=64, nhead=4, num_encoder_layers=2, dim_feedforward=128, dropout=0.1):
        super(StockPriceTransformer, self).__init__()

        self.d_model = d_model
        self.pos_encoder = nn.Parameter(torch.zeros(1, 4, d_model))  # Sequence length is 4

        # Directly project input to d_model dimensions
        self.input_proj = nn.Linear(1, d_model)

        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_encoder_layers)

        self.fc_out = nn.Linear(d_model, 1)  # Output dimension is 1 (predicting the next day's price)

    def forward(self, src):
        src = self.input_proj(src) * torch.sqrt(torch.tensor(self.d_model, dtype=torch.float32))
        src = src + self.pos_encoder
        output = self.transformer_encoder(src)
        output = self.fc_out(output[:, -1, :])  # Take the output corresponding to the last day
        return output

# Model instantiation
d_model = 64
nhead = 4
num_encoder_layers = 2
dim_feedforward = 128
dropout = 0.1

model = StockPriceTransformer(d_model, nhead, num_encoder_layers, dim_feedforward, dropout)




In [15]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [16]:
# input_dim = 11
# output_dim = 1
# num_layers = 2
# dropout = 0.1
# d_model = 64
# nhead = 4

# model = TransformerModel(input_dim=input_dim, d_model=d_model, num_layers=num_layers, nhead=nhead, dropout=dropout, output_dim=output_dim)

# Set directory paths
dpath = '/content/drive/My Drive/LAB/data/'  # Path to your data directory
mpath = '/content/drive/My Drive/LAB/output_transformer/'  # Path to save model

# Initialize dictionaries to store MAE and MSE
mae = dict()
mse = dict()

# Get sorted list of files
files = os.listdir(dpath)
files.sort()

# Model Training Parameters
num_epochs = 3  # Adjust as needed

# Iterate over each file and train the model
for file in tqdm(files):
    try:
        df=pd.read_csv(dpath + file).set_index('Date')
        df = df[["Close"]]

        scaler = MinMaxScaler(feature_range=(-1, 1))
        df['Close'] = scaler.fit_transform(df['Close'].values.reshape(-1,1))

        x_train, y_train, x_test, y_test = load_data(df)

        # Convert data to tensors
        x_train = torch.tensor(x_train, dtype=torch.float32)
        x_test = torch.tensor(x_test, dtype=torch.float32)
        y_train = torch.tensor(y_train, dtype=torch.float32)
        y_test = torch.tensor(y_test, dtype=torch.float32)

        # Define loss and optimizer
        loss_fn = nn.MSELoss()
        optimiser = torch.optim.Adam(model.parameters(), lr=0.01)
        hist = np.zeros(num_epochs)

        # Training loop
        for t in range(num_epochs):
            model.train()
            y_train_pred = model(x_train)

            loss = loss_fn(y_train_pred, y_train)
            print(f"{file}: Epoch {t}, MSE: {loss.item()}", end='\r')
            hist[t] = loss.item()

            # Zero out gradient
            optimiser.zero_grad()

            # Backward pass and optimize
            loss.backward()
            optimiser.step()

        # Test the model
        model.eval()
        with torch.no_grad():
            y_test_pred = model(x_test)

        # Invert predictions and ground truth
        y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
        y_train = scaler.inverse_transform(y_train.detach().numpy())
        y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
        y_test = scaler.inverse_transform(y_test.detach().numpy())

        # Calculate MAE and MSE
        abse = mean_absolute_error(y_test, y_test_pred)
        mse_value = mean_squared_error(y_test, y_test_pred)

        # Store the errors in respective dictionaries
        mae[file] = abse
        mse[file] = mse_value

    except Exception as e:
        mae[file] = f"ERROR: {str(e)}"
        mse[file] = f"ERROR: {str(e)}"

# Save the model state
torch.save(model.state_dict(), os.path.join(mpath, 'model_state.pth'))

for key in mae:
    if isinstance(mae[key], np.float32):
        mae[key] = float(mae[key])
    if isinstance(mse[key], np.float32):
        mse[key] = float(mse[key])

# Save the MAE and MSE dictionaries to files
with open(os.path.join(mpath, 'mae_log.json'), 'w') as f:
    json.dump(mae, f)

with open(os.path.join(mpath, 'mse_log.json'), 'w') as f:
    json.dump(mse, f)

  0%|          | 0/50 [00:00<?, ?it/s]



  2%|▏         | 1/50 [00:29<24:11, 29.62s/it]



  4%|▍         | 2/50 [01:46<46:05, 57.62s/it]



  6%|▌         | 3/50 [03:05<52:41, 67.27s/it]



  8%|▊         | 4/50 [03:33<39:40, 51.76s/it]



 10%|█         | 5/50 [04:07<33:56, 45.27s/it]



 12%|█▏        | 6/50 [05:23<40:53, 55.76s/it]



 14%|█▍        | 7/50 [06:20<40:14, 56.15s/it]



 16%|█▌        | 8/50 [07:34<43:20, 61.91s/it]



 18%|█▊        | 9/50 [08:39<43:00, 62.93s/it]



 20%|██        | 10/50 [09:48<43:11, 64.79s/it]



 22%|██▏       | 11/50 [10:05<32:38, 50.21s/it]



 24%|██▍       | 12/50 [11:13<35:14, 55.64s/it]



 26%|██▌       | 13/50 [12:22<36:47, 59.66s/it]



 28%|██▊       | 14/50 [13:24<36:04, 60.14s/it]



 30%|███       | 15/50 [14:35<36:58, 63.40s/it]



 32%|███▏      | 16/50 [15:47<37:25, 66.06s/it]



 34%|███▍      | 17/50 [16:58<37:11, 67.63s/it]



 36%|███▌      | 18/50 [18:19<38:06, 71.47s/it]



 38%|███▊      | 19/50 [19:25<36:12, 70.09s/it]



 40%|████      | 20/50 [20:36<35:11, 70.38s/it]



 42%|████▏     | 21/50 [21:50<34:32, 71.47s/it]



 42%|████▏     | 21/50 [23:00<31:46, 65.74s/it]


KeyboardInterrupt: 

# Inference

In [None]:
import numpy as np
import random
import pandas as pd

import datetime
import math, time
import itertools
import datetime
from operator import itemgetter
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
import torch
import torch.nn as nn
from torch.autograd import Variable

import matplotlib.pyplot as plt
import os

In [None]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error as mape
from tqdm import tqdm

In [None]:
# # Model Parameters
# input_dim = 11
# d_model = 64
# num_layers = 2
# nhead = 4
# dropout = 0.1
# output_dim = 1

# # Initialize the model
# model = TransformerModel(input_dim=input_dim, d_model=d_model, num_layers=num_layers, nhead=nhead, dropout=dropout, output_dim=output_dim)

# Load the trained model
model.load_state_dict(torch.load('/content/drive/My Drive/LAB/output_transformer/model_state.pth'))
model.eval()

log = {}

# Function to perform inference on a single file
def infer(file):
    dpath = "/content/drive/My Drive/LAB/data/"
    mpath = '/content/drive/My Drive/LAB/output_transformer/'

    df=pd.read_csv(dpath + file).set_index('Date')
    df = df[["Close"]]

    scaler = MinMaxScaler(feature_range=(-1, 1))
    df['Close'] = scaler.fit_transform(df['Close'].values.reshape(-1,1))

    x_train, y_train, x_test, y_test = load_data(df)

    x_train = torch.from_numpy(x_train).type(torch.Tensor)
    x_test = torch.from_numpy(x_test).type(torch.Tensor)
    y_train = torch.from_numpy(y_train).type(torch.Tensor)
    y_test = torch.from_numpy(y_test).type(torch.Tensor)

    with torch.no_grad():
        y_train_pred = model(x_train)
        y_test_pred = model(x_test)

    # Inverting the predictions and ground truth values back to the original scale
    y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
    y_train = scaler.inverse_transform(y_train.detach().numpy())
    y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
    y_test = scaler.inverse_transform(y_test.detach().numpy())

    # Plotting the results
    plt.figure(figsize=(15, 6))
    plt.plot(df.index[-len(y_test):], y_test, color='red', label='Real Stock Price')
    plt.plot(df.index[-len(y_test):], y_test_pred, color='blue', label='Predicted Stock Price')
    plt.title(f'{file} Stock Price Prediction')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.savefig(mpath + file.split('.')[0] + '_transformer.png')
    plt.show()


    mae_value = mean_absolute_error(y_test, y_test_pred)
    mse_value = mean_squared_error(y_test, y_test_pred)

    mae[file] = mae_value
    mse[file] = mse_value



In [None]:
from tqdm import tqdm

# Assuming 'files' is a list of filenames
for file in tqdm(files):
    try:
        result = infer(file)
        log.update(result)  # Store results from inference
    except Exception as e:
        print(f"An error occurred with file {file}: {str(e)}")
        continue

# Optionally, save the log to a file for later analysis
with open('/content/drive/My Drive/LAB/output_transformer/inference_log.json', 'w') as f:
    json.dump(log, f)


In [None]:
import json
with open('/content/drive/My Drive/LAB/output_transformer/mae_log.json', 'w') as f:
    json.dump(mae, f)

with open('/content/drive/My Drive/LAB/output_transformer/mse_log.json', 'w') as f:
    json.dump(mse, f)