In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
import torch.optim as optim

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device{device}")

In [5]:
merged_df = pd.read_csv('date.csv') 

In [6]:
merged_df

Unnamed: 0,date,open,high,low,close,adjClose,volume,unadjustedVolume,change,changePercent,...,standardDeviation_60,sma_252,ema_252,wma_252,dema_252,tema_252,williams_252,rsi_252,adx_252,standardDeviation_252
0,2024-01-12,33.70,33.80,33.60,33.75,33.750000,5221622,5221622,0.05,0.14837,...,1.020919,35.925794,35.464153,35.180345,33.758654,33.572743,-73.033708,58.030311,11.549357,2.062031
1,2024-01-11,33.70,33.80,33.60,33.70,33.700000,6590499,6590499,0.00,0.00000,...,1.044641,35.927976,35.477811,35.197563,33.772382,33.585058,-73.595506,57.998561,11.561317,2.060017
2,2024-01-10,34.05,34.05,33.70,33.70,33.700000,10231832,10231832,-0.35,-1.03000,...,1.054935,35.930754,35.491977,35.215197,33.787124,33.598885,-73.595506,57.998561,11.573325,2.057482
3,2024-01-09,34.30,34.30,34.05,34.05,34.050000,6191243,6191243,-0.25,-0.72886,...,1.054960,35.932143,35.506256,35.232842,33.802097,33.613052,-69.662921,58.304875,11.584803,2.056093
4,2024-01-08,34.40,34.55,34.25,34.25,34.250000,5522713,5522713,-0.15,-0.43605,...,1.053184,35.933730,35.517860,35.247734,33.811725,33.619199,-67.415730,58.480667,11.594301,2.054793
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731,2021-01-20,37.87,37.91,37.09,37.23,33.397839,38529978,38529978,-0.64,-1.69000,...,0.469706,37.966000,38.652031,37.760667,38.624533,38.597498,-92.307692,0.000000,0.000000,0.469706
732,2021-01-19,37.82,38.18,37.82,37.96,34.050301,13261660,13261660,0.14,0.37017,...,0.326343,38.150000,38.663362,38.026000,38.646975,38.630837,-74.803150,0.000000,0.000000,0.326343
733,2021-01-18,38.14,38.14,37.64,37.82,33.927963,27598255,27598255,-0.32,-0.83901,...,0.354902,38.213333,38.668967,38.070000,38.658054,38.647261,-85.826772,0.000000,0.000000,0.354902
734,2021-01-15,38.64,38.64,38.14,38.14,34.213417,33681520,33681520,-0.50,-1.29000,...,0.270000,38.410000,38.675731,38.320000,38.671496,38.667295,-100.000000,0.000000,0.000000,0.270000


In [7]:
# 選擇特徵
features = ['open', 'high', 'low', 'volume', 'vwap', 'sma_5', 'sma_20']
merged_df = merged_df[features + ['date', 'close']]

# 轉換日期
merged_df['date'] = pd.to_datetime(merged_df['date'])
merged_df.set_index('date', inplace=True)

# 資料標準化
scaler = StandardScaler()
scaled_data = scaler.fit_transform(merged_df)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['date'] = pd.to_datetime(merged_df['date'])


In [8]:
scaled_data

array([[-1.22001657, -1.24051179, -1.19287512, ..., -1.18040729,
        -1.11545972, -1.19845474],
       [-1.22001657, -1.24051179, -1.19287512, ..., -1.15469193,
        -1.10833013, -1.20908769],
       [-1.14528462, -1.18765413, -1.17123988, ..., -1.13326246,
        -1.10449111, -1.20908769],
       ...,
       [-0.27198834, -0.32290284, -0.31881121, ..., -0.25393982,
        -0.26740343, -0.33293267],
       [-0.16522841, -0.21718752, -0.21063499, ..., -0.21179519,
        -0.22426023, -0.26488179],
       [-0.10757804, -0.16010125, -0.11327638, ..., -0.15393562,
        -0.16502974, -0.15004594]])

In [9]:
# 創建時間序列資料
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length, -1]  # 最後一列是 'close' 價格
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 30  # 例如使用過去30天的數據來預測未來股價
X, y = create_sequences(scaled_data, seq_length)

# 轉換為PyTorch的tensor
X_tensor = torch.from_numpy(X).float().to(device)
y_tensor = torch.from_numpy(y).float().to(device)

# 分割訓練集和測試集
train_size = int(len(X) * 0.8)
X_train, X_test = X_tensor[:train_size], X_tensor[train_size:]
y_train, y_test = y_tensor[:train_size], y_tensor[train_size:]

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out



In [10]:
input_size = len(features)+1  # 確保這與選擇的特徵數量一致
hidden_size = 50
num_layers = 5
model = LSTM(input_size, hidden_size, num_layers).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    outputs = model(X_train)
    optimizer.zero_grad()
    loss = criterion(outputs, y_train.unsqueeze(1))
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, y_test.unsqueeze(1))
    print(f'Test Loss: {test_loss.item():.4f}')

Epoch [10/100], Loss: 0.7736
Epoch [20/100], Loss: 0.4358
Epoch [30/100], Loss: 0.1844
Epoch [40/100], Loss: 0.0920
Epoch [50/100], Loss: 0.0535
Epoch [60/100], Loss: 0.0422
Epoch [70/100], Loss: 0.0361
Epoch [80/100], Loss: 0.0336
Epoch [90/100], Loss: 0.0323
Epoch [100/100], Loss: 0.0314
Test Loss: 0.2853
