In [1]:
import torch
print(torch.__version__)

2.5.1


In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("StockPred").getOrCreate()
print(spark.version)

4.0.0


In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, r2_score

# -----------------------
# Load CSV
# -----------------------
df = pd.read_csv("D:\\stock\\Market.csv")

# Drop non-numeric columns (e.g., Date)
df = df.drop(columns=["Date"], errors="ignore")
df = df.apply(pd.to_numeric, errors="coerce").fillna(0)
from sklearn.preprocessing import MinMaxScaler

# Scale all numeric columns between 0 and 1
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X = scaler_X.fit_transform(df.drop(columns=["Close"]).values)
y = scaler_y.fit_transform(df[["Close"]].values)  # keep 2D for scaler


dataset = TensorDataset(X, y)


In [23]:
class StockNN(nn.Module):
    def __init__(self, input_dim):
        super(StockNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.layers(x)

In [24]:
def train_model(model, dataloader, optimizer, criterion, epochs):
    for epoch in range(epochs):
        all_preds, all_targets = [], []

        for xb, yb in dataloader:
            preds = model(xb)
            loss = criterion(preds, yb)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            all_preds.append(preds.detach())
            all_targets.append(yb.detach())

        all_preds = torch.cat(all_preds).numpy()
        all_targets = torch.cat(all_targets).numpy()

        mae = mean_absolute_error(all_targets, all_preds)
        r2 = r2_score(all_targets, all_preds)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")

In [26]:
# Model 1
print("\n--- Model 1: Divide Epochs ---")
model1 = StockNN(input_dim=X.shape[1])
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
criterion = nn.MSELoss()

dataloader_full = DataLoader(dataset, batch_size=32, shuffle=True)

for block in range(10):   # 10 blocks × 10 epochs = 100 epochs
    print(f"\nBlock {block+1}")
    train_model(model1, dataloader_full, optimizer1, criterion, epochs=10)


--- Model 1: Divide Epochs ---

Block 1
Epoch 1/10, Loss: 384261832048640.0000, MAE: 438362.5938, R²: -718550.9375
Epoch 2/10, Loss: 11700794.0000, MAE: 151970.2188, R²: -200255.0781
Epoch 3/10, Loss: 20803096576.0000, MAE: 46160.4219, R²: -8114.5039
Epoch 4/10, Loss: 59223444.0000, MAE: 29275.3242, R²: -3975.9580
Epoch 5/10, Loss: 25637284.0000, MAE: 4621.8945, R²: -4.7245
Epoch 6/10, Loss: 20301504.0000, MAE: 4684.1943, R²: -2.9557
Epoch 7/10, Loss: 72180640.0000, MAE: 3720.4336, R²: 0.3764
Epoch 8/10, Loss: 5488175.0000, MAE: 3690.5142, R²: 0.3841
Epoch 9/10, Loss: 160804992.0000, MAE: 4023.4597, R²: -0.3683
Epoch 10/10, Loss: 35582104.0000, MAE: 4065.6819, R²: -6.2156

Block 2
Epoch 1/10, Loss: 45536612.0000, MAE: 4006.3345, R²: -2.3984
Epoch 2/10, Loss: 30588850.0000, MAE: 4188.7954, R²: -0.7309
Epoch 3/10, Loss: 28720232.0000, MAE: 3808.0859, R²: -0.2549
Epoch 4/10, Loss: 3524144.2500, MAE: 3863.5383, R²: 0.3572
Epoch 5/10, Loss: 19921358.0000, MAE: 3797.4646, R²: 0.1950
Epoch 6

In [27]:
 #Model 2
model2 = StockNN(input_dim=X.shape[1])
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)
criterion = nn.MSELoss()

subset_size = len(dataset) // 10
subsets = [Subset(dataset, range(i*subset_size, (i+1)*subset_size)) for i in range(10)]

for i, subset in enumerate(subsets):
    print(f"\nSubset {i+1}")
    dataloader_subset = DataLoader(subset, batch_size=32, shuffle=True)
    train_model(model2, dataloader_subset, optimizer2, criterion, epochs=10)


Subset 1
Epoch 1/10, Loss: 3163849.5000, MAE: 387213.8438, R²: -2033783.2500
Epoch 2/10, Loss: 27373920.0000, MAE: 1654.6405, R²: -2.8943
Epoch 3/10, Loss: 10425511.0000, MAE: 1081.1443, R²: -0.1835
Epoch 4/10, Loss: 572826.2500, MAE: 1358.3533, R²: -1.9950
Epoch 5/10, Loss: 328453248.0000, MAE: 2091.5535, R²: -5.6224
Epoch 6/10, Loss: 11435121.0000, MAE: 4641.8291, R²: -69.6009
Epoch 7/10, Loss: 35415818240.0000, MAE: 79061.9844, R²: -24697.9609
Epoch 8/10, Loss: 38090.3750, MAE: 553890.4375, R²: -1344335.5000
Epoch 9/10, Loss: 6130262.0000, MAE: 852.4103, R²: 0.3952
Epoch 10/10, Loss: 1772995.3750, MAE: 1071.1125, R²: -0.1834

Subset 2
Epoch 1/10, Loss: 783524.1250, MAE: 1206.9003, R²: 0.7137
Epoch 2/10, Loss: 4464794.0000, MAE: 1610.5055, R²: 0.3622
Epoch 3/10, Loss: 5717619.5000, MAE: 1529.2286, R²: 0.4523
Epoch 4/10, Loss: 6106918.0000, MAE: 1434.5635, R²: 0.5845
Epoch 5/10, Loss: 12999557.0000, MAE: 1834.1028, R²: 0.2898
Epoch 6/10, Loss: 113191888.0000, MAE: 4680.4556, R²: -7.9