In [12]:
import pandas as pd
import numpy as np
from datetime import datetime
import jdatetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, 
    r2_score, mean_absolute_percentage_error
)
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm
import torch
from torch import nn
import torch.optim as optim

In [13]:
df = pd.read_csv('../data/processed/economics.csv')
df.head()

Unnamed: 0,Date,b1,b2,b3,b4,b5,b7,b8,b9,b10,b14,b17,b18,B-23,B-24
0,1360-01-01,2806.0,,918.0,4380.0,25.1,17.0,19.0,77.7,270.0,1078.5,,,,
1,1360-04-01,2437.0,,834.0,4552.0,20.8,18.0,19.0,81.3,270.0,1191.1,,,,
2,1360-07-01,2600.0,,720.0,4653.8,23.3,18.0,20.0,79.6,270.0,1246.2,,,,
3,1360-10-01,2355.0,,750.0,5236.1,18.4,20.0,19.0,80.8,270.0,1408.1,,,,
4,1361-01-01,2332.0,,850.0,5250.2,44.8,20.0,22.0,82.4,350.0,1324.9,0.5,0.5,0.233333,


In [14]:
# convert start_date and end_date to jdatetime
df['Date'] = df['Date'].apply(lambda x: jdatetime.datetime.strptime(x, '%Y-%m-%d'))
# convert start_date and end_date to gregorian
df['Date'] = df['Date'].apply(lambda x: x.togregorian())
df.head()

Unnamed: 0,Date,b1,b2,b3,b4,b5,b7,b8,b9,b10,b14,b17,b18,B-23,B-24
0,1981-03-21,2806.0,,918.0,4380.0,25.1,17.0,19.0,77.7,270.0,1078.5,,,,
1,1981-06-22,2437.0,,834.0,4552.0,20.8,18.0,19.0,81.3,270.0,1191.1,,,,
2,1981-09-23,2600.0,,720.0,4653.8,23.3,18.0,20.0,79.6,270.0,1246.2,,,,
3,1981-12-22,2355.0,,750.0,5236.1,18.4,20.0,19.0,80.8,270.0,1408.1,,,,
4,1982-03-21,2332.0,,850.0,5250.2,44.8,20.0,22.0,82.4,350.0,1324.9,0.5,0.5,0.233333,


In [15]:
# Date to Unix timestamp
df['Date'] = pd.to_datetime(df['Date']).astype(int) / 10**9
df.head()

Unnamed: 0,Date,b1,b2,b3,b4,b5,b7,b8,b9,b10,b14,b17,b18,B-23,B-24
0,353980800.0,2806.0,,918.0,4380.0,25.1,17.0,19.0,77.7,270.0,1078.5,,,,
1,362016000.0,2437.0,,834.0,4552.0,20.8,18.0,19.0,81.3,270.0,1191.1,,,,
2,370051200.0,2600.0,,720.0,4653.8,23.3,18.0,20.0,79.6,270.0,1246.2,,,,
3,377827200.0,2355.0,,750.0,5236.1,18.4,20.0,19.0,80.8,270.0,1408.1,,,,
4,385516800.0,2332.0,,850.0,5250.2,44.8,20.0,22.0,82.4,350.0,1324.9,0.5,0.5,0.233333,


In [16]:
df.dtypes

Date    float64
b1      float64
b2      float64
b3      float64
b4      float64
b5      float64
b7      float64
b8      float64
b9      float64
b10     float64
b14     float64
b17     float64
b18     float64
B-23    float64
B-24    float64
dtype: object

In [17]:
# back fill b8 col
df['b8'] = df['b8'].fillna(method='ffill')
# get nan proportion
df.isna().sum() / len(df)

Date    0.000000
b1      0.046512
b2      0.255814
b3      0.046512
b4      0.046512
b5      0.046512
b7      0.046512
b8      0.000000
b9      0.046512
b10     0.046512
b14     0.046512
b17     0.069767
b18     0.069767
B-23    0.069767
B-24    0.279070
dtype: float64

In [18]:
# normalize the data
scaler = MinMaxScaler(feature_range=(-1, 1))

df_cols = df.columns

df = scaler.fit_transform(df)

df = pd.DataFrame(df, columns=df_cols)
df.head()

Unnamed: 0,Date,b1,b2,b3,b4,b5,b7,b8,b9,b10,b14,b17,b18,B-23,B-24
0,-1.0,-0.664623,,-0.965079,-1.0,-0.999941,-1.0,-1.0,-0.99938,-1.0,-1.0,,,,
1,-0.988089,-0.745403,,-0.979894,-0.999993,-0.999978,-0.999971,-1.0,-0.999211,-1.0,-0.999739,,,,
2,-0.976178,-0.70972,,-1.0,-0.999989,-0.999956,-0.999971,-0.999977,-0.999291,-1.0,-0.999611,,,,
3,-0.964652,-0.763354,,-0.994709,-0.999965,-0.999998,-0.999912,-1.0,-0.999234,-1.0,-0.999236,,,,
4,-0.953253,-0.768389,,-0.977072,-0.999964,-0.999772,-0.999912,-0.999932,-0.999159,-0.999422,-0.999429,-1.0,-1.0,-0.999861,


In [19]:
# split data to train and test
# x_train, x_test, y_train, y_test = train_test_split(
#     df[[c for c in df if c not in ['y']]], df['y'], test_size=0.2, random_state=42
# )
x_train, x_test, y_train, y_test = train_test_split(
    df['Date'], df['b8'], test_size=0.2, random_state=42
)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(137,) (35,) (137,) (35,)


In [20]:
# Assuming your x_train and x_test are pandas Series, reshape them to (-1, 1) and then normalize
x_train_normalized = scaler.fit_transform(x_train.values.reshape(-1, 1))
x_test_normalized = scaler.transform(x_test.values.reshape(-1, 1))

# Reshape data from [samples, features] to [samples, timesteps, features] 
# For time series problems, you might need to convert your 2D data to 3D data
# Here we take 1 as timestep, you might need to adjust based on your data and problem
x_train_final = x_train_normalized.reshape(-1, 1, 1)
x_test_final = x_test_normalized.reshape(-1, 1, 1)

# Convert data to tensors
x_train_tensor = torch.from_numpy(x_train_final).float()
y_train_tensor = torch.from_numpy(y_train.values).float()
x_test_tensor = torch.from_numpy(x_test_final).float()
y_test_tensor = torch.from_numpy(y_test.values).float()

# Create DataLoader objects
train_data = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32)
test_data = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_data, batch_size=32)

In [21]:
class Model(nn.Module):
    def __init__(self, feature_size, num_layers, dropout=0.1):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(feature_size, feature_size, num_layers=num_layers, batch_first=True)
        transformer_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=1, dropout=dropout)
        self.transformer = nn.TransformerEncoder(transformer_layer, num_layers=num_layers)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.transformer(x)
        return x

In [22]:
# Initialize your model
model = Model(feature_size=1, num_layers=10)

# Use Mean Squared Error Loss for regression problems
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
for epoch in range(num_epochs):
    model.train() 
    for batch in train_loader:
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimizations
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Evaluation loop
model.eval()
total_loss = 0
predictions = []
actuals = []
epoch_losses = []
with torch.no_grad():
    for batch in test_loader:
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        predictions.extend(outputs.squeeze().detach().cpu().numpy())
        actuals.extend(labels.squeeze().detach().cpu().numpy())
    epoch_losses.append(loss.item())
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')
print(f'Test Loss: {total_loss/len(test_loader)}')

# Convert lists to numpy arrays
predictions = np.array(predictions)
actuals = np.array(actuals)

# Compute metrics
mse = mean_squared_error(actuals, predictions)
mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R^2 Score: {r2}')

Epoch 1/10, Loss: 0.8828027844429016


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2/10, Loss: 0.8153678774833679
Epoch 3/10, Loss: 0.7534667253494263
Epoch 4/10, Loss: 0.6974374055862427
Epoch 5/10, Loss: 0.6472606062889099
Epoch 6/10, Loss: 0.602830708026886
Epoch 7/10, Loss: 0.5639131665229797
Epoch 8/10, Loss: 0.5303089618682861
Epoch 9/10, Loss: 0.5015811920166016
Epoch 10/10, Loss: 0.4773508608341217
Epoch 10/10, Loss: 0.25898027420043945
Test Loss: 0.3018205165863037
Mean Squared Error: 0.3373176157474518
Mean Absolute Error: 0.5216992497444153
R^2 Score: -0.48711456328444713


  return F.mse_loss(input, target, reduction=self.reduction)
