In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import datetime as dt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings("ignore")
dataset = pd.read_csv('0050_20230427_20230602.csv')  # 讀取訓練集
dataset

FileNotFoundError: [Errno 2] No such file or directory: '0050_20230427_20230602.csv'

In [None]:
def calculate_rsi(data,window):
    # 計算價格變動
    delta = data.diff()

    # 去除第一個NaN值
    delta = delta[1:]

    # 分割正價變動和負價變動
    positive = delta.copy()
    negative = delta.copy()
    positive[positive < 0] = 0
    negative[negative > 0] = 0

    # 計算平均增長率
    avg_gain = positive.rolling(window).mean()
    avg_loss = abs(negative.rolling(window).mean())

    # 計算相對強度指數 (RSI)
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))

    return rsi

In [None]:
def calculate_macd(data, ema_short=12, ema_long=26, signal_period=9):
    # 計算快速移動平均（EMA12）
    ema12 = data.ewm(span=ema_short, adjust=False).mean()

    # 計算慢速移動平均（EMA26）
    ema26 = data.ewm(span=ema_long, adjust=False).mean()

    # 計算 DIF（快速移動平均減去慢速移動平均）
    dif = ema12 - ema26

    # 計算九日移動平均（MACD）
    macd = dif.ewm(span=signal_period, adjust=False).mean()

    return dif, macd

In [None]:
data = pd.DataFrame()
# 目前採用RSI & MACD兩種指標，計算出RSI & MACD的值後加入到原本data中
RSIWindow = 5 # 計算rsi的時間長度
data['Date'] = pd.to_datetime(dataset['Date'])
data['Open'] = dataset['Open']
data['Close'] = dataset['Close']
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day
data['WeekDay']=data['Date'].dt.strftime('%A')
data['RSI'] = calculate_rsi(data['Close'],RSIWindow)
data['DIF'] = calculate_macd(data['Close'])[0]
data['MACD'] = calculate_macd(data['Close'])[1]
data['Pillar'] = data['DIF'] - data['MACD']

data

Unnamed: 0,Date,Open,Close,Year,Month,Day,WeekDay,RSI,DIF,MACD,Pillar
0,2023-04-27,116.050003,116.0,2023,4,27,Thursday,,0.0,0.0,0.0
1,2023-04-28,116.849998,117.400002,2023,4,28,Friday,,0.111681,0.022336,0.089345
2,2023-05-02,117.449997,117.699997,2023,5,2,Tuesday,,0.221839,0.062237,0.159602
3,2023-05-03,117.349998,117.199997,2023,5,3,Wednesday,,0.265731,0.102936,0.162795
4,2023-05-04,117.0,117.5,2023,5,4,Thursday,,0.321023,0.146553,0.17447
5,2023-05-05,117.699997,117.800003,2023,5,5,Friday,82.142877,0.384616,0.194166,0.19045
6,2023-05-08,118.5,118.550003,2023,5,8,Monday,76.744203,0.489886,0.25331,0.236576
7,2023-05-09,118.599998,118.900002,2023,5,9,Tuesday,77.272775,0.5947,0.321588,0.273112
8,2023-05-10,118.800003,117.800003,2023,5,10,Wednesday,60.714383,0.582292,0.373729,0.208564
9,2023-05-11,118.25,117.550003,2023,5,11,Thursday,50.909146,0.545993,0.408181,0.137811


In [None]:
# 會drop掉前幾行RSI為空值
data.dropna(inplace=True)
data.reset_index(drop=True,inplace=True)
data.to_csv('test_SP2.csv', index=False) # for sentiment analysis
data

Unnamed: 0,Date,Open,Close,Year,Month,Day,WeekDay,RSI,DIF,MACD,Pillar
0,2023-05-05,117.699997,117.800003,2023,5,5,Friday,82.142877,0.384616,0.194166,0.19045
1,2023-05-08,118.5,118.550003,2023,5,8,Monday,76.744203,0.489886,0.25331,0.236576
2,2023-05-09,118.599998,118.900002,2023,5,9,Tuesday,77.272775,0.5947,0.321588,0.273112
3,2023-05-10,118.800003,117.800003,2023,5,10,Wednesday,60.714383,0.582292,0.373729,0.208564
4,2023-05-11,118.25,117.550003,2023,5,11,Thursday,50.909146,0.545993,0.408181,0.137811
5,2023-05-12,117.150002,117.199997,2023,5,12,Friday,39.285617,0.48341,0.423227,0.060183
6,2023-05-15,116.949997,117.199997,2023,5,15,Monday,17.073071,0.428869,0.424355,0.004513
7,2023-05-16,117.900002,118.599998,2023,5,16,Tuesday,45.161251,0.492931,0.43807,0.05486
8,2023-05-17,118.650002,120.550003,2023,5,17,Wednesday,84.810019,0.69306,0.489068,0.203992
9,2023-05-18,121.800003,122.150002,2023,5,18,Thursday,93.396125,0.969594,0.585174,0.38442


In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import OneHotEncoder


In [None]:
data = pd.read_csv('test_SP3.csv')
# data with further processing, correspondent to the sentiment data
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
data

Unnamed: 0,Date,WeekDay_Friday,WeekDay_Monday,WeekDay_Thursday,WeekDay_Tuesday,WeekDay_Wednesday,RSI,DIF,MACD,Pillar,Open,RESULT
0,2023-05-05,1.0,0.0,0.0,0.0,0.0,82.142877,0.384616,0.194166,0.19045,117.699997,+
1,2023-05-08,0.0,1.0,0.0,0.0,0.0,76.744203,0.489886,0.25331,0.236576,118.5,-
2,2023-05-10,0.0,0.0,0.0,0.0,1.0,60.714383,0.582292,0.373729,0.208564,118.800003,-
3,2023-05-11,0.0,0.0,1.0,0.0,0.0,50.909146,0.545993,0.408181,0.137811,118.25,+
4,2023-05-12,1.0,0.0,0.0,0.0,0.0,39.285617,0.48341,0.423227,0.060183,117.150002,+
5,2023-05-16,0.0,0.0,0.0,1.0,0.0,45.161251,0.492931,0.43807,0.05486,117.900002,+
6,2023-05-18,0.0,0.0,1.0,0.0,0.0,93.396125,0.969594,0.585174,0.38442,121.800003,-
7,2023-05-23,0.0,0.0,0.0,1.0,0.0,92.857078,1.453091,0.961254,0.491837,122.300003,-
8,2023-05-24,0.0,0.0,0.0,0.0,1.0,68.420982,1.464407,1.061885,0.402523,121.650002,+
9,2023-05-26,1.0,0.0,0.0,0.0,0.0,82.857114,1.870172,1.305201,0.56497,125.0,x


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Step 2: Split the data into features (X) and labels (Y)
X = data[[ 'WeekDay_Friday', 'WeekDay_Monday', 'WeekDay_Thursday',	'WeekDay_Tuesday',	'WeekDay_Wednesday',	'RSI', 'DIF',	'MACD',	'Pillar',	'Open']]
Y = data['RESULT']

# Step 3: Split the data into train and test sets based on the date
train_end_date = pd.to_datetime('2023-05-15')
train_df = data[data['Date'] >= train_end_date]
test_df = data[data['Date'] < train_end_date]

X_train = train_df[[ 'WeekDay_Friday', 'WeekDay_Monday', 'WeekDay_Thursday',	'WeekDay_Tuesday',	'WeekDay_Wednesday',	'RSI', 'DIF',	'MACD',	'Pillar',	'Open']]
Y_train = train_df['RESULT']
X_test = test_df[[ 'WeekDay_Friday', 'WeekDay_Monday', 'WeekDay_Thursday',	'WeekDay_Tuesday',	'WeekDay_Wednesday',	'RSI', 'DIF',	'MACD',	'Pillar',	'Open']]
Y_test = test_df['RESULT']

# Step 4: Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Reshape the features
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Step 6: Encode the categorical labels
label_encoder = LabelEncoder()
Y_train = label_encoder.fit_transform(Y_train)
Y_test = label_encoder.transform(Y_test)

# Step 7: Perform one-hot encoding on the categorical labels
num_classes = len(label_encoder.classes_)
Y_train = to_categorical(Y_train, num_classes=num_classes)
Y_test = to_categorical(Y_test, num_classes=num_classes)


# Step 8: Build and train the LSTM model
model = Sequential()
model.add(LSTM(units=64, input_shape=(1, X_train.shape[2])))
model.add(Dense(units=Y_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=10, batch_size=32)

# Step 9: Get predictions on the test set
class_names = label_encoder.classes_
y_pred_prob = model.predict(X_test)
y_pred_labels = [class_names[label] for label in y_pred_prob.argmax(axis=1)]
Y_test_labels = [class_names[label] for label in Y_test.argmax(axis=1)]
loss, accuracy = model.evaluate(X_test, Y_test)
print('Test loss:', loss, 'Test accuracy:', accuracy)

print(y_pred_labels,'\n', Y_test_labels)
# Step 10: Generate Classification Report
report = classification_report(Y_test_labels, y_pred_labels)
print('Classification Report:\n', report)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.0811831951141357 Test accuracy: 0.4000000059604645
['+', '+', '+', '-', '+'] 
 ['+', '-', '-', '+', '+']
Classification Report:
               precision    recall  f1-score   support

           +       0.50      0.67      0.57         3
           -       0.00      0.00      0.00         2

    accuracy                           0.40         5
   macro avg       0.25      0.33      0.29         5
weighted avg       0.30      0.40      0.34         5



In [None]:
best_acc = 0
best_units = 0
for i in range(10,101):
  optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
  model = Sequential()
  model.add(LSTM(units=i, input_shape=(1, X_train.shape[2])))
  model.add(Dense(units=Y_train.shape[1], activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
  model.fit(X_train, Y_train, epochs=10, batch_size=32)

  y_pred_prob = model.predict(X_test)
  loss, cur_acc = model.evaluate(X_test, Y_test)
  if cur_acc > best_acc:
    best_acc = cur_acc
    best_units = i

print(best_acc, best_units)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [None]:
# 繪製RSI買賣時機圖
pltdat = data
fig, ax1 = plt.subplots()

ax1.plot(pltdat['Date'],pltdat['Close'],label='Close') # 繪製股價折線圖
buy_signals = pltdat[pltdat['RSI standard'] == 1] # 設定買點
sell_signals = pltdat[pltdat['RSI standard'] == -1] # 設定賣點
ax1.scatter(buy_signals['Date'], buy_signals['Close'], color='green', label='Buy')
ax1.scatter(sell_signals['Date'], sell_signals['Close'], color='red', label='Sell') # 將買賣點標示在圖上
ax2 = ax1.twinx()
ax2.plot(pltdat['Date'],pltdat['RSI'],label='RSI',color='orange') # 繪製RSI折線圖
ax2.axhline(y=standard[0], color='grey', linestyle='--')
ax2.axhline(y=standard[1], color='grey', linestyle='--')
ax2.set_ylabel('RSI')
ax2.set_ylim([0,110])

ax1.set_xlabel('Date')
ax1.set_ylabel('Close')
ax2.set_xlabel('Date')
ax2.set_ylabel('RSI')
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
handles = handles1 + handles2
labels = labels1 + labels2
ax1.legend(handles, labels, loc='upper right')
ax1.tick_params(axis='x', labelrotation = 90)
ax1.set_title('Buy when RSI < %d, Sell when RSI > %d'%(standard[0],standard[1]))
plt.show()

In [None]:
# 繪製MACD買賣時間圖
pltdat = data
fig, ax1 = plt.subplots()

ax1.plot(pltdat['Date'],pltdat['Close'],label='Close') # 繪製股價折線圖
buy_signals = pltdat[pltdat['MACD standard'] == 1] # 設定買點
sell_signals = pltdat[pltdat['MACD standard'] == -1] # 設定賣點
ax1.scatter(buy_signals['Date'], buy_signals['Close'], color='green', label='Buy')
ax1.scatter(sell_signals['Date'], sell_signals['Close'], color='red', label='Sell') # 將買賣點標示在圖上
ax2 = ax1.twinx()
ax2.plot(pltdat[pltdat['Pillar']>0]['Date'],pltdat[pltdat['Pillar']>0]['Pillar'],label='OSC',color='green') # 繪製MACD折線圖
ax2.plot(pltdat[pltdat['Pillar']<0]['Date'],pltdat[pltdat['Pillar']<0]['Pillar'],color='red') # 繪製MACD折線圖
ax2.axhline(y=0, color='grey', linestyle='--')
ax2.set_ylim([-1,1])

ax2.set_ylabel('DIF-MACD')

ax1.set_xlabel('Date')
ax1.set_ylabel('Close')
ax2.set_xlabel('Date')
ax2.set_ylabel('DIF - MACD')
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
handles = handles1 + handles2
labels = labels1 + labels2
ax1.legend(handles, labels, loc='upper right')
ax1.tick_params(axis='x', labelrotation = 90)
ax1.set_title('Buy when OSC turn from negative to positive, sell otherwise')
plt.show()