<a href="https://colab.research.google.com/github/nirajband/FMML_Projects_and-Labs/blob/main/Transformer1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Install PyTorch
!pip install torch torchvision




In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset


In [24]:
# Load the dataset
df = pd.read_csv('/content/yahoo_stock.csv')

# Convert date columns to datetime if necessary
for col in df.columns:
    if 'date' in col.lower():
        df[col] = pd.to_datetime(df[col], errors='coerce')

# Drop rows with NaN values
df.dropna(inplace=True)

# Convert all non-numeric columns to numeric (e.g., dates to ordinal)
for col in df.columns:
    if df[col].dtype == 'datetime64[ns]':
        df[col] = df[col].map(pd.Timestamp.toordinal)

# Ensure all columns are numeric
df = df.apply(pd.to_numeric, errors='coerce')

# Drop any remaining NaN values
df.dropna(inplace=True)

# Assuming the last column is the target variable and the rest are features
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)


In [25]:
class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = StockDataset(X_train, y_train)
test_dataset = StockDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [26]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_encoder_layers, dim_feedforward, dropout)
        self.fc_in = nn.Linear(input_dim, d_model)
        self.fc_out = nn.Linear(d_model, 1)

    def forward(self, src):
        src = self.fc_in(src)
        src = src.unsqueeze(1)  # Add a sequence dimension
        output = self.transformer(src, src)
        output = output.mean(dim=1)  # Pool the sequence dimension
        output = self.fc_out(output)
        return output

input_dim = X_train.shape[1]
d_model = 64
nhead = 4
num_encoder_layers = 2
dim_feedforward = 128

model = TransformerModel(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward)




In [27]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [29]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 5487681.5000
Epoch [2/10], Loss: 6741459.0000
Epoch [3/10], Loss: 5560884.0000
Epoch [4/10], Loss: 5446133.0000
Epoch [5/10], Loss: 5776695.0000
Epoch [6/10], Loss: 5686371.0000
Epoch [7/10], Loss: 5108828.0000
Epoch [8/10], Loss: 4937902.0000
Epoch [9/10], Loss: 4587239.5000
Epoch [10/10], Loss: 4190870.5000


In [30]:
model.eval()
with torch.no_grad():
    test_loss = 0
    for inputs, targets in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

    test_loss /= len(test_loader)
    print(f'Test Loss: {test_loss:.4f}')


Test Loss: 4952951.0000
