# TSM Trading Algorithm

### Imports

In [1]:
# Imports
import os
import requests
import pandas as pd
import numpy as np
import seaborn as sns
from pathlib import Path
import hvplot.pandas
import holoviews as hv
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
import yfinance as yf

### Functions??

### Data Collection:
All data was collected using Yahoo Finance

## Edit Dates?

In [2]:
symbol = 'TSM'
stock = yf.Ticker(symbol)
start_date = '2020-01-01'
end_date = '2024-01-01'

In [3]:
stock_df = stock.history(interval='1d',
                      start=start_date,
                      end=end_date,
                      actions=False,
                      auto_adjust=True,
)
stock_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1006 entries, 2020-01-02 00:00:00-05:00 to 2023-12-29 00:00:00-05:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    1006 non-null   float64
 1   High    1006 non-null   float64
 2   Low     1006 non-null   float64
 3   Close   1006 non-null   float64
 4   Volume  1006 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 47.2 KB


### Data Preprocessing:
Clean and preprocess the collected data. Addition of any necessary columns for data analysis.

In [4]:
# Adding columns for Daily Returns of TSM & DXY
stock_df['Actual Returns'] = stock_df['Close'].pct_change()
stock_df.dropna(inplace=True)

stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Actual Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-03 00:00:00-05:00,54.290811,54.300016,53.434605,53.453018,10546400,-0.032978
2020-01-06 00:00:00-05:00,53.02951,53.112369,52.596806,52.836174,8897200,-0.01154
2020-01-07 00:00:00-05:00,52.89141,53.950156,52.237748,53.692375,7444300,0.016205
2020-01-08 00:00:00-05:00,53.572695,54.30001,53.499044,54.088261,5381500,0.007373
2020-01-09 00:00:00-05:00,54.953673,54.972086,54.04223,54.530174,5112700,0.00817


### Indicators and signals generated

#### RSI

In [5]:
# Defining a function to calculate RSI Data
def calculate_rsi(data, window):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Define the window size for RSI calculation
window = 20

# Calculate RSI for the 'TSM' column
stock_df['RSI'] = calculate_rsi(stock_df['Close'], window)
stock_df.dropna(inplace=True)
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Actual Returns,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-31 00:00:00-05:00,51.031703,51.077735,49.34691,49.659931,13467200,-0.033333,37.141017
2020-02-03 00:00:00-05:00,50.037393,50.580577,49.844057,50.424068,9594300,0.015387,40.237332
2020-02-04 00:00:00-05:00,52.403467,53.066337,52.385057,52.95586,12746600,0.05021,50.343376
2020-02-05 00:00:00-05:00,53.489837,53.535872,52.633631,53.32412,6594400,0.006954,48.913054
2020-02-06 00:00:00-05:00,53.64635,53.655555,52.734904,53.416187,4236300,0.001727,47.980076


#### Stochastic Oscillator (14, 6, 6)

In [6]:
# Function to calculate stochastic oscillator (14, 6, 6)
def calculate_stochastic_oscillator(high, low, close, k_window=14, d_window=6):
    lowest_low = low.rolling(window=k_window).min()
    highest_high = high.rolling(window=k_window).max()
    k_percent = ((close - lowest_low) / (highest_high - lowest_low)) * 100
    d_percent = k_percent.rolling(window=d_window).mean()
    return k_percent, d_percent

# Assuming you have 'High', 'Low', and 'Close' columns in your DataFrame
# Calculate stochastic oscillator (14, 6, 6)
stock_df['%K'], stock_df['%D'] = calculate_stochastic_oscillator(stock_df['High'], stock_df['Low'], stock_df['Close'])
stock_df.dropna(inplace=True)
# Display the DataFrame with stochastic oscillator values
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Actual Returns,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-02-27 00:00:00-05:00,49.899289,50.267547,49.043083,49.061497,11878900,-0.031619,41.935469,1.674242,21.194374
2020-02-28 00:00:00-05:00,47.275444,49.761203,46.953217,49.567867,13604500,0.010321,45.801212,32.457136,18.799009
2020-03-02 00:00:00-05:00,50.055806,51.565674,49.558657,51.538055,13797700,0.039747,54.318367,56.914202,23.976934
2020-03-03 00:00:00-05:00,51.832668,51.897113,49.751998,50.525345,14661800,-0.01965,50.230233,44.34285,28.982841
2020-03-04 00:00:00-05:00,51.52885,52.559979,51.142175,52.550774,9878200,0.040087,49.057411,69.885027,38.905307


#### Signals Added

In [7]:
# Assuming you have a DataFrame stocks_df with features including  'RSI', 'Stochastic Oscillator', etc.

# Initialize signal column
stock_df['Signal'] = 0

# Buy signal conditions: RSI < 30, %K < 20
buy_condition = (stock_df['RSI'] < 30) & (stock_df['%K'] < 20)
stock_df.loc[buy_condition, 'Signal'] = 1

# Sell signal conditions: RSI > 70, %K > 80, preceded by a buy signal
sell_condition = ((stock_df['RSI'] > 70) & (stock_df['%K'] > 80))
stock_df.loc[sell_condition, 'Signal'] = -1

stock_df['Signal'].value_counts()

Signal
 0    863
-1     56
 1     49
Name: count, dtype: int64

In [8]:
stock_df["Entry/Exit"] = stock_df["Signal"].diff()
stock_df["Entry/Exit"].value_counts()

Entry/Exit
 0.0    889
-1.0     39
 1.0     39
Name: count, dtype: int64

#### Visualization of Entry & Exit Points

In [9]:
# Plot closing price
closing_price_plot = stock_df['Close'].hvplot.line(x='Date', y='Close', label='Closing Price', color='grey', width=800, height=400)

# Calculate the points in time when the Signal value changes
signal_changes = stock_df['Signal'].diff()

# Identify trade entry (1) and exit (-1) points
entry_points = stock_df[signal_changes == 1]
exit_points = stock_df[signal_changes == -1]
# Plot entry points
entry_plot = entry_points.hvplot.scatter(x='Date', y='Close', marker='^', color='green', size=100, label='Entry Point')

# Plot exit points
exit_plot = exit_points.hvplot.scatter(x='Date', y='Close', marker='v', color='red', size=100, label='Exit Point')

# Combine plots
closing_price_plot * entry_plot * exit_plot

### Split the data into training and testing datasets.

In [10]:
# Define features and target variable
features = ['RSI', '%K', '%D']
target = 'Entry/Exit'

In [11]:
# Assign a copy of the 50 day MA & RSI columns to a features DataFrame called X
X = stock_df[features].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0_level_0,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-28 00:00:00-05:00,41.935469,1.674242,21.194374
2020-03-02 00:00:00-05:00,45.801212,32.457136,18.799009
2020-03-03 00:00:00-05:00,54.318367,56.914202,23.976934
2020-03-04 00:00:00-05:00,50.230233,44.34285,28.982841
2020-03-05 00:00:00-05:00,49.057411,69.885027,38.905307


In [12]:
# Create the target set selecting the Signal column and assiging it to y
y = stock_df[target]

# Review the value counts
y.value_counts()

Entry/Exit
 0.0    889
-1.0     39
 1.0     39
Name: count, dtype: int64

In [13]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2020-02-28 00:00:00-05:00


In [14]:
# Select the ending period for the training data with an offset of [6, 12, 18, 24, 30, 36] months
training_end = X.index.min() + DateOffset(months=6)

# Display the training end date
print(training_end)

2020-08-28 00:00:00-04:00


In [15]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-28 00:00:00-05:00,41.935469,1.674242,21.194374
2020-03-02 00:00:00-05:00,45.801212,32.457136,18.799009
2020-03-03 00:00:00-05:00,54.318367,56.914202,23.976934
2020-03-04 00:00:00-05:00,50.230233,44.34285,28.982841
2020-03-05 00:00:00-05:00,49.057411,69.885027,38.905307


In [16]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

Unnamed: 0_level_0,RSI,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-08-31 00:00:00-04:00,53.782326,70.458111,61.472403
2020-09-01 00:00:00-04:00,50.693835,51.658686,65.177753
2020-09-02 00:00:00-04:00,52.998181,87.99348,73.775149
2020-09-03 00:00:00-04:00,51.235118,91.976211,78.047871
2020-09-04 00:00:00-04:00,51.517977,77.265941,75.389711


### Model Training

In [17]:
# Scale the features DataFrames

# Create a StandardScaler instance
#scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
#X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
#X_train_scaled = X_scaler.transform(X_train)
#X_test_scaled = X_scaler.transform(X_test)

In [18]:
# Import RandomOverSampler from imblearn
from imblearn.over_sampling import RandomOverSampler

# Instantiate the RandomOverSampler instance
random_oversampler = RandomOverSampler(random_state=1)

# Fit the data to the model
X_resampled, y_resampled = random_oversampler.fit_resample(X_train, y_train)

For each model please restart kernel and run all cells up to this point. To run the different models you need to skip over and continue running the model you would like. All code is featured to show the work but each model has to run separately. This way all data is scaled pre model training. Only run one model at a time then skip to the Backtesting Section

#### SVC Classifier

In [19]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC()
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_resampled, y_resampled)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, svm_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7699642431466031


In [20]:
# Generate classification report
print("Classification Report:")
print(classification_report(y_test, svm_pred))

Classification Report:
              precision    recall  f1-score   support

        -1.0       0.08      0.33      0.13        33
         0.0       0.94      0.80      0.87       773
         1.0       0.32      0.39      0.35        33

    accuracy                           0.77       839
   macro avg       0.45      0.51      0.45       839
weighted avg       0.88      0.77      0.82       839



#### SGD Classifier

In [21]:
# Create an SGDClassifier with logistic loss function
#sgd_model = SGDClassifier()

# Fit the model on training data
#sgd_model.fit(X_resampled, y_resampled)

# Predict on the testing data
#sgd_pred = sgd_model.predict(X_test)

#accuracy = accuracy_score(y_test, sgd_pred)
#print("Accuracy:", accuracy)
# Generate classification report
#print("Classification Report:")
#print(classification_report(y_test, sgd_pred))

#### Gradient Boosting Machines (GBM) Classifier

In [22]:
# Initialize the GradientBoostingClassifier
#gbm = GradientBoostingClassifier()

# Train the model
#gbm.fit(X_resampled, y_resampled)

# Predict on the test set
#gbm_pred = gbm.predict(X_test)

#accuracy = accuracy_score(y_test, gbm_pred)
#print("Accuracy:", accuracy)
# Generate classification report
#print("Classification Report:")
#print(classification_report(y_test, gbm_pred))

#### LogisticRegression Model

In [23]:
# Initiate the model instance
#logistic_regression_model = LogisticRegression()

# Fit the model using the training data
#logistic_regression_model = logistic_regression_model.fit(X_resampled, y_resampled)

# Use the testing dataset to generate the predictions for the new model
#lr_pred = logistic_regression_model.predict(X_test)

# Generate classification report
#print("Classification Report:")
#print(classification_report(y_test, lr_pred))

In [24]:
# Execute trades based on entry/exit points
capital = 100000  # Initial capital
percentage_of_capital = 0.03  # 3%
shares = 0  # Number of shares held

for index, row in stock_df.iterrows():
    if row['Entry/Exit'] == 1:  # Buy signal
        shares_to_buy = int((capital * percentage_of_capital) / row['Close'])
        shares += shares_to_buy
        capital -= shares_to_buy * row['Close']
    elif row['Entry/Exit'] == -1 and shares > 0:  # Sell signal
        capital += shares * row['Close']
        shares = 0

# Calculate final profit/loss
final_balance = capital + shares * stock_df.iloc[-1]['Close']
initial_balance = 100000
profit_loss = final_balance - initial_balance

print("Final balance:", final_balance)
print("Profit/loss:", profit_loss)


Final balance: 103134.08936691284
Profit/loss: 3134.089366912842
