In [None]:
import os
import shutil
from pathlib import Path
from yahooquery import Ticker

STOCK = "aapl"
DATASET_PATH = Path(".qlib")
REG_DIR = DATASET_PATH / Path("us")
STOCK_PATH = REG_DIR / Path(f"{STOCK}.csv")

if os.path.exists(DATASET_PATH):
    shutil.rmtree(DATASET_PATH)
os.makedirs(REG_DIR)

aapl = Ticker("aapl")
price = aapl.history(period="max")

price.to_csv(STOCK_PATH)

In [None]:
!python scripts/dump_bin.py dump_all --csv_path .qlib/us --qlib_dir .qlib/qlib_data --include_fields open,close,high,low,volume,adjclose

In [None]:
import qlib
from qlib.data import D
from qlib.constant import REG_CN, REG_US

start_date = "2012-01-01"
end_date = "2022-12-31"

fields_dict = {
    "Open": "$open",
    "Close": "$close",
    "Open_Close_Diff": "$open-$close",
    "Adj_Close": "$adjclose",
    "High": "$high",
    "Low": "$low",
    "Factor": "$adjclose/$close",
    "Volume": "$volume",
    "High_Low_Diff": "$high-$low",
    "Open_MA3": "Mean($open, 3)",
    "Open_MA5": "Mean($open, 5)",
    "Open_MA10": "Mean($open, 10)",
    "Open_MA20": "Mean($open, 20)",
    "Open_EMA3": "EMA($open, 3)",
    "Open_EMA5": "EMA($open, 5)",
    "Open_EMA10": "EMA($open, 10)",
    "Open_EMA20": "EMA($open, 20)",
    "Open_WMA3": "WMA($open, 3)",
    "Open_WMA5": "WMA($open, 5)",
    "Open_WMA10": "WMA($open, 10)",
    "Open_WMA20": "WMA($open, 20)",
    "Close_MA3": "Mean($close, 3)",
    "Close_MA5": "Mean($close, 5)",
    "Close_MA10": "Mean($close, 10)",
    "Close_MA20": "Mean($close, 20)",
    "Close_EMA3": "EMA($close, 3)",
    "Close_EMA5": "EMA($close, 5)",
    "Close_EMA10": "EMA($close, 10)",
    "Close_EMA20": "EMA($close, 20)",
    "Close_WMA3": "WMA($close, 3)",
    "Close_WMA5": "WMA($close, 5)",
    "Close_WMA10": "WMA($close, 10)",
    "Close_WMA20": "WMA($close, 20)",
    "DIF": "(EMA($close, 12) - EMA($close, 26))/$close",
    "DEA": "EMA((EMA($close, 12) - EMA($close, 26))/$close, 9) ",
    "MACD": "(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close",
    "Residual": "$adjclose-Ref($adjclose,1)",
}

provider_uri = ".qlib/qlib_data"  # target_dir
qlib.init(provider_uri=provider_uri, region=REG_US)
instruments = ["AAPL"]
fields = fields_dict.values()
dataset = D.features(
    instruments, fields, start_time=start_date, end_time=end_date, freq="day"
).rename(columns={v: k for k, v in fields_dict.items()})
print(len(dataset))
dataset.isna().sum()

In [None]:
dataset = dataset.loc["AAPL"]

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 设置图形尺寸
plt.figure(figsize=(10, 10))
sns.heatmap(dataset.corr(), cmap="coolwarm")
plt.tight_layout()
plt.show()
# dataset.corr()

In [None]:
from sklearn.model_selection import train_test_split

ratio = 0.8
train_data, test_data = train_test_split(dataset, train_size=ratio, shuffle=False)
len(train_data), len(test_data)

In [None]:
import numpy as np
import torch

# Create sequences and labels for training data
sequence_length = 60  # Number of time steps to look back
X_train, y_train = [], []
for i in range(len(train_data) - sequence_length):
    X_train.append(train_data[i : i + sequence_length])
    y_train.append(train_data[i + 1 : i + sequence_length + 1])
X_train, y_train = np.array(X_train), np.array(y_train)
# y_train = np.expand_dims(y_train[..., -1], axis=-1)
y_train = np.expand_dims(y_train[..., 3], axis=-1)
X_train.shape, y_train.shape

In [None]:
# Create sequences and labels for testing data
sequence_length = 30  # Number of time steps to look back
X_test, y_test = [], []
for i in range(len(test_data) - sequence_length):
    X_test.append(test_data[i : i + sequence_length])
    y_test.append(test_data[i + 1 : i + sequence_length + 1])
X_test, y_test = np.array(X_test), np.array(y_test)
y_test = np.expand_dims(y_test[..., -1], axis=-1)
X_test.shape, y_test.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

train_x_scaler = MinMaxScaler((0, 1))
train_y_scaler = MinMaxScaler((0, 1))
scaled_X_train = train_x_scaler.fit_transform(
    X_train.reshape(-1, X_train.shape[-1])
).reshape(X_train.shape)
# scaled_y_train = train_y_scaler.fit_transform(
#     y_train.reshape(-1, y_train.shape[-1])
# ).reshape(y_train.shape)
scaled_y_train = y_train

test_x_scaler = MinMaxScaler((0, 1))
test_y_scaler = MinMaxScaler((0, 1))
scaled_X_test = test_x_scaler.fit_transform(
    X_test.reshape(-1, X_test.shape[-1])
).reshape(X_test.shape)
# scaled_y_test = test_y_scaler.fit_transform(
#     y_test.reshape(-1, y_test.shape[-1])
# ).reshape(y_test.shape)
scaled_y_test = y_test

scaled_X_train.shape, scaled_y_train.shape, scaled_X_test.shape, scaled_y_test.shape

In [None]:
X_train = torch.tensor(scaled_X_train, dtype=torch.float32)
y_train = torch.tensor(scaled_y_train, dtype=torch.float32)
X_test = torch.tensor(scaled_X_test, dtype=torch.float32)
y_test = torch.tensor(scaled_y_test, dtype=torch.float32)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
import torch.nn as nn


class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers,dropout=0.5):
        super(RNNModel, self).__init__()  # initializes the parent class nn.Module
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout=nn.Dropout(dropout)
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):  # defines forward pass of the neural network
        out, _ = self.rnn(x)
        out = self.linear(self.dropout(out))
        return out

In [None]:
input_size = len(fields_dict)
num_layers = 2
hidden_size = 64
output_size = 1

# Define the model, loss function, and optimizer
model = RNNModel(input_size, hidden_size, num_layers).to(device)

loss_fn = torch.nn.MSELoss(reduction='mean')
# loss_fn = torch.nn.L1Loss(reduction="mean")

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
print(model)

In [None]:
batch_size = 16
# Create DataLoader for batch training
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)

# Create DataLoader for batch training
test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False
)

In [None]:
num_epochs = 100
train_hist = []
test_hist = []
# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0

    # Training
    model.train()
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        predictions = model(batch_X)
        loss = loss_fn(predictions, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Calculate average training loss and accuracy
    average_loss = total_loss / len(train_loader)
    train_hist.append(average_loss)

    # Validation on test data
    model.eval()
    with torch.no_grad():
        total_test_loss = 0.0

        for batch_X_test, batch_y_test in test_loader:
            batch_X_test, batch_y_test = batch_X_test.to(device), batch_y_test.to(
                device
            )
            predictions_test = model(batch_X_test)
            test_loss = loss_fn(predictions_test, batch_y_test)

            total_test_loss += test_loss.item()

        # Calculate average test loss and accuracy
        average_test_loss = total_test_loss / len(test_loader)
        test_hist.append(average_test_loss)
    if (epoch + 1) % 10 == 0:
        print(
            f"Epoch [{epoch+1}/{num_epochs}] - Training Loss: {average_loss:.4f}, Test Loss: {average_test_loss:.4f}"
        )

In [None]:
import matplotlib.pyplot as plt

x = np.linspace(1, num_epochs, num_epochs)
plt.plot(x, train_hist, scalex=True, label="Training loss")
plt.plot(x, test_hist, label="Test loss")
plt.legend()
plt.show()

In [None]:
# Define the number of future time steps to forecast
num_forecast_steps = 30

# Convert to NumPy and remove singleton dimensions
sequence_to_plot = X_test.squeeze().cpu().numpy()

# Use the last 30 data points as the starting point
historical_data = sequence_to_plot[-1]

# Initialize a list to store the forecasted values
forecasted_values = []

# Use the trained model to forecast future values
with torch.no_grad():
    for _ in range(num_forecast_steps):
        # Prepare the historical_data tensor
        historical_data_tensor = (
            torch.as_tensor(historical_data).unsqueeze(0).float().to(device)
        )
        # historical_data_tensor = torch.as_tensor(historical_data.reshape(1,-1,1)).float().to(device)
        # Use the model to predict the next value
        predicted_value = model(historical_data_tensor).cpu().numpy()[0, 0]

        # Append the predicted value to the forecasted_values list
        forecasted_values.append(predicted_value[0])

        # Update the historical_data sequence by removing the oldest value and adding the predicted value
        historical_data = np.roll(historical_data, shift=-1)
        historical_data[-1] = predicted_value


# Generate futute dates
# last_date = test_data.index[-1]

# Generate the next 30 dates
# future_dates = pd.date_range(start=last_date + pd.DateOffset(1), periods=30)

# Concatenate the original index with the future dates
# combined_index = test_data.index.append(future_dates)

In [None]:
# set the size of the plot
plt.rcParams["figure.figsize"] = [14, 4]

# Test data
plt.plot(
    test_data.index[-100:-30],
    test_data["Adj_Close"][-100:-30],
    label="test_data",
    color="b",
)

# reverse the scaling transformation
# original_cases = feats_scaler.inverse_transform(sequence_to_plot[-1])[:,3]
# original_cases = test_y_scaler.inverse_transform(sequence_to_plot[-1].reshape(-1, 1))
# the historical data used as input for forecasting
# plt.plot(test_data.index[-30:], original_cases, label='actual values', color='green')
plt.plot(
    test_data.index[-30:],
    test_data["Adj_Close"][-30:],
    label="actual values",
    color="green",
)

# Forecasted Values
# reverse the scaling transformation
# print(forecasted_values)
forecasted_cases = test_y_scaler.inverse_transform(
    np.expand_dims(forecasted_values, axis=-1)
).flatten()
print(forecasted_cases)
# last_value = test_data["Adj_Close"][-31]
# result = []
# for idx,i in enumerate(forecasted_cases.tolist()):
# for idx, i in enumerate(forecasted_values):
#     last_value += i
#     result.append(last_value)
# result = np.array(result)
# print(result)
# plotting the forecasted values
# plt.plot(test_data.index[-30:], result, label="forecasted values", color="red")
plt.plot(test_data.index[-30:], forecasted_cases, label='forecasted values', color='red')

plt.xlabel("Time Step")
plt.ylabel("Value")
plt.legend()
plt.title("Time Series Forecasting")
plt.grid(True)