# TSM Trading Algorithm

### Imports

In [1]:
# Imports
import os
import requests
import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path
import hvplot.pandas
import holoviews as hv
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import RandomOverSampler
import yfinance as yf

### Data Collection:
All data was collected using Yahoo Finance

In [2]:
symbol = 'TSM'
stock = yf.Ticker(symbol)
start_date = '2000-01-01'
end_date = '2024-01-01'

In [3]:
stock_df = stock.history(interval='1d',
                      start=start_date,
                      end=end_date,
                      actions=False,
                      auto_adjust=True,
)
stock_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6037 entries, 2000-01-03 00:00:00-05:00 to 2023-12-29 00:00:00-05:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    6037 non-null   float64
 1   High    6037 non-null   float64
 2   Low     6037 non-null   float64
 3   Close   6037 non-null   float64
 4   Volume  6037 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 283.0 KB


### Data Preprocessing:
Clean and preprocess the collected data. Addition of any necessary columns for data analysis.

In [4]:
# Adding columns for Daily Returns of TSM & DXY
stock_df['TSM_Returns'] = stock_df['Close'].pct_change()
stock_df.dropna(inplace=True)
stock_df.drop(columns='Volume', inplace=True)
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,TSM_Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-04 00:00:00-05:00,9.126191,9.487386,9.102111,9.18639,0.006596
2000-01-05 00:00:00-05:00,9.270668,9.330867,9.186389,9.246589,0.006553
2000-01-06 00:00:00-05:00,9.029872,9.041911,8.668677,8.861314,-0.041667
2000-01-07 00:00:00-05:00,9.174348,9.246588,9.053949,9.198428,0.038043
2000-01-10 00:00:00-05:00,9.499425,9.571664,9.366987,9.571664,0.040576


#### Visualization of TSM Price (candlestick? or line)

### Indicators and signals generated

#### RSI

In [5]:
# Defining a function to calculate RSI Data
def calculate_rsi(data, window):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Define the window size for RSI calculation
window = 14

# Calculate RSI for the 'TSM' column
stock_df['RSI'] = calculate_rsi(stock_df['Close'], window)
stock_df.dropna(inplace=True)
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,TSM_Returns,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-24 00:00:00-05:00,10.859927,11.799033,10.835847,11.498037,0.092677,76.666618
2000-01-25 00:00:00-05:00,11.498034,12.665898,11.485995,12.617739,0.097382,81.456899
2000-01-26 00:00:00-05:00,12.95486,13.869887,12.087993,12.184311,-0.034351,75.206646
2000-01-27 00:00:00-05:00,11.630475,11.642514,11.124802,11.341519,-0.06917,69.731797
2000-01-28 00:00:00-05:00,11.076643,11.100722,10.414452,10.462611,-0.077495,59.25926


#### Stochastic Oscillator (14, 6, 6)

In [6]:
# Function to calculate stochastic oscillator (14, 6, 6)
def calculate_stochastic_oscillator(high, low, close, k_window=5, d_window=3):
    lowest_low = low.rolling(window=k_window).min()
    highest_high = high.rolling(window=k_window).max()
    k_percent = ((close - lowest_low) / (highest_high - lowest_low)) * 100
    d_percent = k_percent.rolling(window=d_window).mean()
    return k_percent, d_percent

# Assuming you have 'High', 'Low', and 'Close' columns in your DataFrame
# Calculate stochastic oscillator (14, 6, 6)
stock_df['%K'], stock_df['%D'] = calculate_stochastic_oscillator(stock_df['High'], stock_df['Low'], stock_df['Close'])
stock_df.dropna(inplace=True)
# Display the DataFrame with stochastic oscillator values
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,TSM_Returns,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-02-01 00:00:00-05:00,10.113458,11.209082,10.077338,11.197042,0.120482,60.993979,34.319555,13.482336
2000-02-02 00:00:00-05:00,11.209078,11.654551,10.884003,11.160918,-0.003226,59.938795,73.376594,37.476623
2000-02-03 00:00:00-05:00,11.84719,13.291969,11.82311,13.21973,0.184466,67.614303,97.931038,68.542396
2000-02-04 00:00:00-05:00,13.31605,13.580925,12.762217,12.894655,-0.02459,62.271513,81.84715,84.384927
2000-02-07 00:00:00-05:00,12.894659,12.894659,12.617743,12.82242,-0.005602,63.599998,78.350627,86.042938


#### Signals Added

In [7]:
# Initialize capital, percentage of capital per trade, and shares
capital = 100000  # Initial capital
percentage_of_capital = 0.10  # 10%
shares = 0  # Number of shares held

# Initialize list to store entry/exit signals, strategy returns, and account values
entry_exit_signals = []
strategy_returns = []
account_values = []

# Loop through the DataFrame to execute trades and record signals
for index, row in stock_df.iterrows():
    if (row['RSI'] < 30) & (row['%K'] < 20):  # Buy signal conditions
        shares_to_buy = int((capital * percentage_of_capital) / row['Close'])
        shares += shares_to_buy
        capital -= shares_to_buy * row['Close']
        entry_exit_signals.append('Buy')
        strategy_returns.append((shares_to_buy * row['Close']) / capital)
    elif ((row['%K'] > 80)) and (shares > 0):  # Sell signal conditions
        capital += shares * row['Close']
        shares = 0
        entry_exit_signals.append('Sell')
        strategy_returns.append((capital - shares * row['Close']) / capital)
    else:
        entry_exit_signals.append('Hold')
        strategy_returns.append(0.0)
    
    # Calculate account value after each trade
    account_value = capital + shares * row['Close']
    account_values.append(account_value)

# Add entry/exit signals, strategy returns, and account values to the DataFrame
stock_df['Entry/Exit'] = entry_exit_signals
stock_df['Strategy_Return'] = strategy_returns
stock_df['Account_Value'] = account_values

# Calculate final profit/loss
final_balance = capital + shares * stock_df.iloc[-1]['Close']
initial_balance = 100000
profit_loss = final_balance - initial_balance

print("Final balance:", final_balance)
print("Profit/loss:", profit_loss)


Final balance: 149066.40730023384
Profit/loss: 49066.40730023384


In [8]:
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,TSM_Returns,RSI,%K,%D,Entry/Exit,Strategy_Return,Account_Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-02-01 00:00:00-05:00,10.113458,11.209082,10.077338,11.197042,0.120482,60.993979,34.319555,13.482336,Hold,0.0,100000.0
2000-02-02 00:00:00-05:00,11.209078,11.654551,10.884003,11.160918,-0.003226,59.938795,73.376594,37.476623,Hold,0.0,100000.0
2000-02-03 00:00:00-05:00,11.84719,13.291969,11.82311,13.21973,0.184466,67.614303,97.931038,68.542396,Hold,0.0,100000.0
2000-02-04 00:00:00-05:00,13.31605,13.580925,12.762217,12.894655,-0.02459,62.271513,81.84715,84.384927,Hold,0.0,100000.0
2000-02-07 00:00:00-05:00,12.894659,12.894659,12.617743,12.82242,-0.005602,63.599998,78.350627,86.042938,Hold,0.0,100000.0


#### Visualization of Entry & Exit Points

In [9]:
# Plot closing price
#closing_price_plot = stock_df['Close'].hvplot.line(x='Date', y='Close', label='Closing Price', color='grey', width=800, height=400)

# Calculate the points in time when the Signal value changes
#signal_changes = stock_df['Signal'].diff()

# Identify trade entry (1) and exit (-1) points
#entry_points = stock_df[signal_changes == 1]
#exit_points = stock_df[signal_changes == -1]
# Plot entry points
#entry_plot = entry_points.hvplot.scatter(x='Date', y='Close', marker='^', color='green', size=100, label='Entry Point')

# Plot exit points
#exit_plot = exit_points.hvplot.scatter(x='Date', y='Close', marker='v', color='red', size=100, label='Exit Point')

# Combine plots
#closing_price_plot * entry_plot * exit_plot

### Split the data into training and testing datasets.

In [10]:
# Define features and target variable
features = ['RSI', '%K', '%D']
target = 'Entry/Exit'

In [11]:
# Assign a copy of the 50 day MA & RSI columns to a features DataFrame called X
X = stock_df[features].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0_level_0,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-02-02 00:00:00-05:00,60.993979,34.319555,13.482336
2000-02-03 00:00:00-05:00,59.938795,73.376594,37.476623
2000-02-04 00:00:00-05:00,67.614303,97.931038,68.542396
2000-02-07 00:00:00-05:00,62.271513,81.84715,84.384927
2000-02-08 00:00:00-05:00,63.599998,78.350627,86.042938


In [12]:
# Create the target set selecting the Signal column and assiging it to y
y = stock_df[target]

# Review the value counts
y.value_counts()

Entry/Exit
Hold    5661
Buy      261
Sell      95
Name: count, dtype: int64

In [13]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2000-02-02 00:00:00-05:00


In [14]:
# Select the ending period for the training data with an offset of [x] months
training_end = X.index.min() + DateOffset(months=216)

# Display the training end date
print(training_end)

2018-02-02 00:00:00-05:00


In [15]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-02-02 00:00:00-05:00,60.993979,34.319555,13.482336
2000-02-03 00:00:00-05:00,59.938795,73.376594,37.476623
2000-02-04 00:00:00-05:00,67.614303,97.931038,68.542396
2000-02-07 00:00:00-05:00,62.271513,81.84715,84.384927
2000-02-08 00:00:00-05:00,63.599998,78.350627,86.042938


In [16]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

Unnamed: 0_level_0,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-02-05 00:00:00-05:00,67.58468,7.188274,46.93853
2018-02-06 00:00:00-05:00,53.614494,4.487166,26.767575
2018-02-07 00:00:00-05:00,52.371129,38.903449,16.859629
2018-02-08 00:00:00-05:00,36.509549,0.0,14.463538
2018-02-09 00:00:00-05:00,31.079894,0.0,12.967816


### Model Training

In [17]:
# Scale the features DataFrames

# Create a StandardScaler instance
#scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
#X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
#X_train_scaled = X_scaler.transform(X_train)
#X_test_scaled = X_scaler.transform(X_test)

In [18]:
# Instantiate the RandomOverSampler instance
random_oversampler = RandomOverSampler(random_state=1)

# Fit the data to the model
X_resampled, y_resampled = random_oversampler.fit_resample(X_train, y_train)

For each model please restart kernel and run all cells up to this point. To run the different models you need to skip over and continue running the model you would like. All code is featured to show the work but each model has to run separately. This way all data is scaled pre model training. Only run one model at a time then skip to the Backtesting Section

#### Gradient Boosting Machines (GBM) Classifier

In [19]:
# Initialize the GradientBoostingClassifier
gbm = GradientBoostingClassifier()

# Train the model
gbm.fit(X_resampled, y_resampled)

# Predict on the test set
gbm_pred = gbm.predict(X_test)

accuracy = accuracy_score(y_test, gbm_pred)
print("Accuracy:", accuracy)
# Generate classification report
print("Classification Report:")
print(classification_report(y_test, gbm_pred))

Accuracy: 0.8162853297442799
Classification Report:
              precision    recall  f1-score   support

         Buy       0.28      0.74      0.41        76
        Hold       0.99      0.82      0.90      1387
        Sell       0.11      0.70      0.20        23

    accuracy                           0.82      1486
   macro avg       0.46      0.75      0.50      1486
weighted avg       0.94      0.82      0.86      1486



#### SVC Classifier

In [21]:
# From SVM, instantiate SVC classifier model instance
#svm_model = svm.SVC()
 
# Fit the model to the data using the training data
#svm_model = svm_model.fit(X_resampled, y_resampled)
 
# Use the testing data to make the model predictions
#svm_pred = svm_model.predict(X_test)

#accuracy = accuracy_score(y_test, svm_pred)
#print("Accuracy:", accuracy)
# Generate classification report
#print("Classification Report:")
#print(classification_report(y_test, svm_pred))

Accuracy: 0.6465661641541038
Classification Report:
              precision    recall  f1-score   support

         Buy       0.37      0.96      0.53       210
        Hold       0.99      0.62      0.76      2746
        Sell       0.03      0.83      0.06        29

    accuracy                           0.65      2985
   macro avg       0.46      0.80      0.45      2985
weighted avg       0.94      0.65      0.74      2985



#### SGD Classifier

In [21]:
# Create an SGDClassifier with logistic loss function
#sgd_model = SGDClassifier()

# Fit the model on training data
#sgd_model.fit(X_resampled, y_resampled)

# Predict on the testing data
#sgd_pred = sgd_model.predict(X_test)

#accuracy = accuracy_score(y_test, sgd_pred)
#print("Accuracy:", accuracy)
# Generate classification report
#print("Classification Report:")
#print(classification_report(y_test, sgd_pred))

Accuracy: 0.5430485762144054
Classification Report:
              precision    recall  f1-score   support

         Buy       0.44      0.91      0.59       211
        Hold       0.98      0.51      0.67      2745
        Sell       0.02      0.79      0.04        29

    accuracy                           0.54      2985
   macro avg       0.48      0.74      0.44      2985
weighted avg       0.94      0.54      0.66      2985



#### LogisticRegression Model

In [20]:
# Initiate the model instance
#logistic_regression_model = LogisticRegression()

# Fit the model using the training data
#logistic_regression_model.fit(X_resampled, y_resampled)

# Use the testing dataset to generate the predictions for the new model
#lr_pred = logistic_regression_model.predict(X_test)

#accuracy = accuracy_score(y_test, lr_pred)
#print("Accuracy:", accuracy)
# Generate classification report
#print("Classification Report:")
#print(classification_report(y_test, lr_pred))

Accuracy: 0.6301507537688442
Classification Report:
              precision    recall  f1-score   support

         Buy       0.41      0.94      0.57       210
        Hold       0.99      0.61      0.75      2746
        Sell       0.03      0.72      0.05        29

    accuracy                           0.63      2985
   macro avg       0.47      0.76      0.46      2985
weighted avg       0.94      0.63      0.73      2985

