In [113]:
# import class for financial data
import yfinance as yf

# import class for data manipulation and analysis
import pandas as pd
import numpy as np

# import class for data visualization
import matplotlib.pyplot as plt
import seaborn as sns

#import class for machine learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# import class for connection to broker 
from ib_insync import *
import nest_asyncio

# import class for supportive functions
import datetime as dt


In [114]:
current_date = dt.datetime.now().strftime('%Y-%m-%d')
ticker = 'NVDA'
df = yf.download(ticker, start = '2020-01-01', end = current_date)

[*********************100%***********************]  1 of 1 completed


In [115]:
df.columns = ['close', 'high', 'low', 'open', 'volume']
df

Unnamed: 0_level_0,close,high,low,open,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,5.972160,5.972160,5.892751,5.943284,237536000
2020-01-03,5.876572,5.920384,5.827532,5.852425,205384000
2020-01-06,5.901216,5.906443,5.757083,5.783221,262636000
2020-01-07,5.972659,6.018462,5.884536,5.929593,314856000
2020-01-08,5.983861,6.025184,5.928349,5.968427,277108000
...,...,...,...,...,...
2025-05-19,135.570007,135.869995,132.389999,132.389999,193154600
2025-05-20,134.380005,134.580002,132.619995,134.289993,161514200
2025-05-21,131.800003,137.399994,130.589996,133.059998,270608700
2025-05-22,132.830002,134.250000,131.550003,132.229996,187344000


In [116]:
%%markdown 
## **Feature selection and why:** 

### **5-day moving average:**
1. Captures short-term trends in stock prices.
2. Helps smooth out daily price fluctuations.

### **15-day moving average:**
1. Captures medium-term trends.
2. Reduces noise from daily price movements typically seen in the 3-day moving average.

### **50-day moving average:**
1. Captures long-term trends in stock prices.
2. Provides a broader perspective on stock performance.

### **5-day volatility:**
1. Measures the variability of stock prices over a 5-day period.
2. Indicates the risk associated with the stock.



## **Feature selection and why:** 

### **5-day moving average:**
1. Captures short-term trends in stock prices.
2. Helps smooth out daily price fluctuations.

### **15-day moving average:**
1. Captures medium-term trends.
2. Reduces noise from daily price movements typically seen in the 3-day moving average.

### **50-day moving average:**
1. Captures long-term trends in stock prices.
2. Provides a broader perspective on stock performance.

### **5-day volatility:**
1. Measures the variability of stock prices over a 5-day period.
2. Indicates the risk associated with the stock.



In [117]:
df['5MA_Returns'] = df['close'].rolling(window=5).mean().pct_change(periods=5)
df['15MA_Returns'] = df['close'].rolling(window=15).mean().pct_change(periods=15)
df['50MA_Returns'] = df['close'].rolling(window=50).mean().pct_change(periods=50)
df['5Volatility'] = df['close'].rolling(window=5).std()
df.dropna(inplace=True)
df

Unnamed: 0_level_0,close,high,low,open,volume,5MA_Returns,15MA_Returns,50MA_Returns,5Volatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-05-26,8.685747,9.148045,8.640165,9.123136,770780000,0.084221,0.142571,0.097290,0.131918
2020-05-27,8.493957,8.601061,7.967396,8.593340,1175892000,0.051545,0.150902,0.112287,0.201072
2020-05-28,8.455848,8.728343,8.348492,8.381371,734896000,0.011255,0.156003,0.123458,0.216071
2020-05-29,8.842918,8.842918,8.453852,8.523097,745256000,-0.003654,0.164813,0.138385,0.228282
2020-06-01,8.773925,8.808298,8.657604,8.800826,389876000,-0.020653,0.168523,0.151717,0.170218
...,...,...,...,...,...,...,...,...,...
2025-05-19,135.570007,135.869995,132.389999,132.389999,193154600,0.142014,0.149474,-0.141653,2.410673
2025-05-20,134.380005,134.580002,132.619995,134.289993,161514200,0.118392,0.157188,-0.133194,0.489825
2025-05-21,131.800003,137.399994,130.589996,133.059998,270608700,0.079850,0.162347,-0.125483,1.525951
2025-05-22,132.830002,134.250000,131.550003,132.229996,187344000,0.047253,0.177570,-0.119602,1.641502


In [118]:
df.describe()

Unnamed: 0,close,high,low,open,volume,5MA_Returns,15MA_Returns,50MA_Returns,5Volatility
count,1257.0,1257.0,1257.0,1257.0,1257.0,1257.0,1257.0,1257.0,1257.0
mean,47.450585,48.348827,46.45985,47.450767,425762300.0,0.012673,0.038493,0.141897,1.385725
std,43.119048,43.954301,42.221325,43.172075,181056700.0,0.057618,0.104982,0.20747,1.668273
min,8.455848,8.601061,7.967396,8.381371,97884000.0,-0.154879,-0.229242,-0.27993,0.041953
25%,15.346693,15.664222,15.09128,15.33996,290708000.0,-0.026497,-0.032071,-0.000879,0.373553
50%,24.67067,25.211655,23.965025,24.759764,403686000.0,0.014594,0.028001,0.146866,0.692823
75%,76.174973,78.545503,74.192142,74.99684,524818000.0,0.051517,0.109066,0.308947,1.643663
max,149.416245,153.115916,147.806407,153.015919,1543911000.0,0.252968,0.336621,0.597735,12.714196


In [119]:
df[['5MA_Returns', '15MA_Returns', '50MA_Returns', '5Volatility']].corr()

Unnamed: 0,5MA_Returns,15MA_Returns,50MA_Returns,5Volatility
5MA_Returns,1.0,0.310539,0.09602,-0.035512
15MA_Returns,0.310539,1.0,0.298758,-0.036306
50MA_Returns,0.09602,0.298758,1.0,-0.002936
5Volatility,-0.035512,-0.036306,-0.002936,1.0


In [120]:
def find_highest_correlation_pair(df):
    corr_matrix = df.corr()
    upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    corr_series = upper_triangle.stack()
    max_corr_index = corr_series.abs().idxmax()
    return max_corr_index, corr_series[max_corr_index]

highest_corr_pair, highest_corr_value = find_highest_correlation_pair(df[['5MA_Returns', '15MA_Returns', '50MA_Returns', '5Volatility']])
print(f"Highest correlation pair: {highest_corr_pair} with correlation value: {highest_corr_value:.2f}")

Highest correlation pair: ('5MA_Returns', '15MA_Returns') with correlation value: 0.31


In [121]:
x_train, x_test, y_train, y_test = train_test_split(
    df[['5MA_Returns', '15MA_Returns', '50MA_Returns', '5Volatility']], 
    df['close'], 
    test_size=0.2, 
    random_state=44, 
    shuffle=True
    )

In [122]:
model = LinearRegression()
model.fit(x_train, y_train)

In [123]:
y_pred = model.predict(x_test)

In [124]:
#Testing the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print('r2 score: ', r2, '\nmean square error: ', mse)

r2 score:  0.6440052625921913 
mean square error:  719.0078859802967


In [125]:
#TODO 6: Visulalize linear test results

In [126]:
#TODO 7: Create buy and sell signals based on predictions

In [127]:
#TODO 8: Visualize the buy and sell signals on the stock price chart

In [128]:
#TODO 9: Evaluate the model's performance using Sharpe Ratio, total return vs buy and hold strategy.

In [129]:
#TODO 10: Using the buy and sell signals to send orders to the broker