# LSTM-Based Directional Prediction of TSMC (2330)

This notebook implements an end-to-end LSTM model to predict whether TSMC's stock price will move up or down using daily data from 2024.

## 1. Import Libraries

In [94]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


## 2. Load Dataset

In [95]:

df = pd.read_csv("TSMC_2330_2024.csv")
# Drop the first row as it appears to be ticker data, not actual data
df = df.iloc[1:].reset_index(drop=True)
# Convert columns to float
df[['Close', 'High', 'Low', 'Open', 'Volume']] = df[['Close', 'High', 'Low', 'Open', 'Volume']].astype(float)
# Add Date column assuming daily business days starting from 2024-01-01
df['Date'] = pd.date_range(start='2024-01-01', periods=len(df), freq='B')
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)
df.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2024-01-01,590.0,593.0,589.0,593.0,573.75,26059058.0
1,2024-01-02,584.0,585.0,576.0,578.0,559.23,37106763.0
2,2024-01-03,580.0,581.0,577.0,580.0,561.17,15309129.0
3,2024-01-04,578.0,580.0,574.0,576.0,557.3,18158971.0
4,2024-01-05,582.0,585.0,579.0,583.0,564.07,17761275.0


## 3. Feature Engineering

In [96]:

df['Return'] = df['Close'].pct_change()
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['Volatility'] = df['Close'].rolling(window=5).std()
df.dropna(inplace=True)


  df['Return'] = df['Close'].pct_change()


## 4. Label Creation (Up / Down)

In [97]:

df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df.dropna(inplace=True)


## 5. Feature Scaling

In [98]:

features = ['Open', 'High', 'Low', 'Close', 'Volume',
            'Return', 'MA5', 'MA10', 'Volatility']

scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])


## 6. Sequence Construction

In [99]:

def create_sequences(data, target, lookback=10):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i])
        y.append(target[i])
    return np.array(X), np.array(y)

lookback = 10
X, y = create_sequences(df[features].values, df['Target'].values, lookback)


## 7. Train-Test Split

In [100]:

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


## 8. LSTM Model

In [101]:

model = Sequential([
    LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(**kwargs)


## 9. Model Training

In [102]:

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=30,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.5094 - loss: 0.6995 - val_accuracy: 0.5000 - val_loss: 0.7067
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.5409 - loss: 0.6901 - val_accuracy: 0.5000 - val_loss: 0.7098
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5535 - loss: 0.6910 - val_accuracy: 0.5000 - val_loss: 0.7099
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5472 - loss: 0.6874 - val_accuracy: 0.5000 - val_loss: 0.7036
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5597 - loss: 0.6915 - val_accuracy: 0.5000 - val_loss: 0.7017
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5660 - loss: 0.6855 - val_accuracy: 0.5000 - val_loss: 0.7000
Epoch 7/30
[1m10/10[0m [32m━━━━

## 10. Evaluation

In [103]:

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 352ms/step
Accuracy: 0.5555555555555556

Classification Report:
               precision    recall  f1-score   support

           0       0.56      1.00      0.71        25
           1       0.00      0.00      0.00        20

    accuracy                           0.56        45
   macro avg       0.28      0.50      0.36        45
weighted avg       0.31      0.56      0.40        45

Confusion Matrix:
 [[25  0]
 [20  0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## 11. Conclusion

This notebook demonstrates how LSTM models can be applied to financial time-series data for directional prediction tasks.