In [37]:
import sys
import pandas as pd
from openbb import obb
from datetime import datetime
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Date, Float
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import inspect
import yfinance as yf
import plotly
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot, plot
from plotly.subplots import make_subplots
import plotly.express as px
import ta
import numpy as np
import matplotlib.pyplot as plt
import json
import data_pipline
import save_data
import control_db

In [3]:
# check environment
print(sys.prefix)
print(sys.executable)

c:\Users\xingyu.liu\Documents\project\finance\.venv
c:\Users\xingyu.liu\Documents\project\finance\.venv\Scripts\python.exe


# Get the data using yfinance

In [4]:
# Download the data
symbol_list = ['NVDA','TSLA','MSFT','AMZN','AAPL','META','GOOG']

data_list = data_pipline.get_data(symbol_list)

Downloading data for NVDA


[*********************100%%**********************]  1 of 1 completed


Downloading data for TSLA


[*********************100%%**********************]  1 of 1 completed


Downloading data for MSFT


[*********************100%%**********************]  1 of 1 completed


Downloading data for AMZN


[*********************100%%**********************]  1 of 1 completed


Downloading data for AAPL


[*********************100%%**********************]  1 of 1 completed


Downloading data for META


[*********************100%%**********************]  1 of 1 completed


Downloading data for GOOG


[*********************100%%**********************]  1 of 1 completed

All symbol data download finish...





# Analysis

In [5]:
data_list.keys()

dict_keys(['stock_company_info', 'stock_actions', 'stock_quarterly_income_stmt', 'stock_quarterly_balance_sheet', 'stock_quarterly_cashflow', 'stock_recommendations_summary', 'stock_upgrades_downgrades', 'stock_get_earnings_dates', 'stock_news', 'stock_data_historical'])

In [6]:
df_historical_raw = data_list['stock_data_historical']
df_historical_raw.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,RSI,...,EMA_0.5,SMA-3-3_shift_1d,SMA-20-20_shift_1d,signal,log_return_buy_n_hold,log_return_trend_follow,return_buy_n_hold,return_trend_follow,action,symbol
0,1999-01-22,0.4375,0.488281,0.388021,0.410156,0.376237,271468800,0.0,0.0,,...,0.376237,,,0,,,,,,NVDA
1,1999-01-25,0.442708,0.458333,0.410156,0.453125,0.415653,51048000,0.0,0.0,,...,0.395945,0.376237,0.376237,0,0.09963,0.0,1.104763,1.0,0.0,NVDA
2,1999-01-26,0.458333,0.467448,0.411458,0.417969,0.383404,34320000,0.0,0.0,,...,0.389674,0.395945,0.395945,0,-0.080761,-0.0,1.019049,1.0,0.0,NVDA
3,1999-01-27,0.419271,0.429688,0.395833,0.416667,0.38221,24436800,0.0,0.0,,...,0.385942,0.391764,0.391764,0,-0.00312,-0.0,1.015874,1.0,0.0,NVDA
4,1999-01-28,0.416667,0.419271,0.41276,0.415365,0.381015,22752000,0.0,0.0,,...,0.383479,0.393755,0.389376,1,-0.003129,-0.003129,1.0127,0.996875,1.0,NVDA


In [28]:
# parameters
symbol_list = ['MSFT','AAPL']
start_date = pd.to_datetime('2023-06-01')
end_date = pd.to_datetime('2024-05-31')
window = 3
alpha = 0.1


In [54]:
df_historical = df_historical_raw[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits','symbol']]
df_historical = df_historical[df_historical['symbol'].isin(symbol_list)]
df_historical = df_historical[(df_historical_raw['Date']>=start_date) & (df_historical_raw['Date']<=end_date)]
df_historical.reset_index(inplace=True, drop=True)
df_historical


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,symbol
0,2023-06-01,325.929993,333.529999,324.720001,332.579987,329.998535,26773900,0.0,0.0,MSFT
1,2023-06-02,334.250000,337.500000,332.549988,335.399994,332.796661,25864000,0.0,0.0,MSFT
2,2023-06-05,335.220001,338.559998,334.660004,335.940002,333.332458,21307100,0.0,0.0,MSFT
3,2023-06-06,335.329987,335.369995,332.170013,333.679993,331.089996,20396200,0.0,0.0,MSFT
4,2023-06-07,331.649994,334.489990,322.500000,323.380005,320.869934,40717100,0.0,0.0,MSFT
...,...,...,...,...,...,...,...,...,...,...
499,2024-05-24,188.820007,190.580002,188.039993,189.979996,189.979996,36294600,0.0,0.0,AAPL
500,2024-05-28,191.509995,193.000000,189.100006,189.990005,189.990005,52280100,0.0,0.0,AAPL
501,2024-05-29,189.610001,192.250000,189.509995,190.289993,190.289993,53068000,0.0,0.0,AAPL
502,2024-05-30,190.759995,192.179993,190.630005,191.289993,191.289993,49947900,0.0,0.0,AAPL


In [55]:
# Adjusted Close price trend over the past 1 year
adjclose_line = px.line(df_historical, x='Date', y='Adj Close', color='symbol')
adjclose_line.show()

## Add technical indicator (RSI, BB, MACD, SMA, EMA)


In [56]:
# RSI
for symbol in symbol_list:
    df_historical.loc[df_historical['symbol'] == symbol, 'RSI'] = ta.momentum.RSIIndicator(df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close']).rsi()

In [57]:
fig = go.Figure()

# Add traces for 'Adj Close'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['Adj Close'],
        mode='lines',
        name=f'Adj Close - {category}',
        yaxis='y1'
    ))

# Add traces for 'RSI'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['RSI'],
        mode='lines',
        name=f'RSI - {category}',
        yaxis='y2'
    ))

# Update layout for dual y-axis
fig.update_layout(
    title='Adj Close and RSI over Time with Dual Y-Axis',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Adj Close'),
    yaxis2=dict(title='RSI', overlaying='y', side='right'),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Display the plot
fig.show()

In [58]:
# Bollinger Bands
for symbol in symbol_list:
    bbands = ta.volatility.BollingerBands(df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close'])
    df_historical.loc[df_historical['symbol'] == symbol, 'BB_Upper'] = bbands.bollinger_hband()
    df_historical.loc[df_historical['symbol'] == symbol, 'BB_Lower'] = bbands.bollinger_lband()

In [59]:
fig = go.Figure()

# Add traces for 'Adj Close'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['Adj Close'],
        mode='lines',
        name=f'Adj Close - {category}',
        yaxis='y1'
    ))

# Add traces for 'BB'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['BB_Upper'],
        mode='lines',
        name=f'BB_Upper - {category}',
        yaxis='y2',
        line=dict(color='grey')
    ))

for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['BB_Lower'],
        mode='lines',
        name=f'BB_Lower - {category}',
        yaxis='y2',
        line=dict(color='grey')
    ))

# Update layout for dual y-axis
fig.update_layout(
    title='Adj Close and Bollinger Bands over Time with Dual Y-Axis',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Adj Close'),
    yaxis2=dict(title='Bollinger Bands', overlaying='y', side='right'),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Display the plot
fig.show()

In [60]:
# MACD
for symbol in symbol_list:
    macd = ta.trend.MACD(df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close'])
    df_historical.loc[df_historical['symbol'] == symbol, 'MACD'] = macd.macd()
    df_historical.loc[df_historical['symbol'] == symbol, 'MACD_signal'] = macd.macd_signal()

In [72]:
fig = go.Figure()

# Add traces for 'Adj Close'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['Adj Close'],
        mode='lines',
        name=f'Adj Close - {category}',
        yaxis='y1'
    ))

# Add traces for 'MACD'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['MACD'],
        mode='lines',
        name=f'MACD - {category}',
        yaxis='y2'
    ))

for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['MACD_signal'],
        mode='lines',
        name=f'MACD_signal - {category}',
        yaxis='y2'
    ))

# Update layout for dual y-axis
fig.update_layout(
    title='Adj Close and MACD over Time with Dual Y-Axis',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Adj Close'),
    yaxis2=dict(title='MACD', overlaying='y', side='right'),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Display the plot
fig.show()

In [82]:
# SMA
SMA1_W = 10
SMA2_W = 60
SMA1 = "SMA_"+str(SMA1_W)
SMA2 = "SMA_"+str(SMA2_W)
for symbol in symbol_list:
    df_historical.loc[df_historical['symbol'] == symbol, SMA1] = df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close'].rolling(SMA1_W,min_periods=1).mean()
    df_historical.loc[df_historical['symbol'] == symbol, SMA2] = df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close'].rolling(SMA2_W,min_periods=1).mean()


In [88]:
fig = go.Figure()

# Add traces for 'Adj Close'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['Adj Close'],
        mode='lines',
        name=f'Adj Close - {category}',
        yaxis='y1'
    ))

# Add traces for 'SMA'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df[SMA1],
        mode='lines',
        name=f'SMA{SMA1_W} - {category}',
        yaxis='y1'
    ))

for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df[SMA2],
        mode='lines',
        name=f'SMA{SMA2_W} - {category}',
        yaxis='y1'
    ))

# Update layout
fig.update_layout(
    title='Adj Close and SMA over Time with Dual Y-Axis',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Adj Close and SMA'),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Display the plot
fig.show()

In [86]:
# EMA
EMA1_ALPHA = 0.1
EMA2_ALPHA = 0.5
EMA1 = "EMA_"+str(EMA1_ALPHA)
EMA2 = "EMA_"+str(EMA2_ALPHA)
for symbol in symbol_list:
    df_historical.loc[df_historical['symbol'] == symbol, EMA1] = df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close'].ewm(alpha=EMA1_ALPHA, adjust=False).mean()
    df_historical.loc[df_historical['symbol'] == symbol, EMA2] = df_historical.loc[df_historical['symbol'] == symbol, 'Adj Close'].ewm(alpha=EMA2_ALPHA, adjust=False).mean()


In [89]:
fig = go.Figure()

# Add traces for 'Adj Close'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df['Adj Close'],
        mode='lines',
        name=f'Adj Close - {category}',
        yaxis='y1'
    ))

# Add traces for 'SMA'
for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df[EMA1],
        mode='lines',
        name=f'SMA{EMA1_ALPHA} - {category}',
        yaxis='y1'
    ))

for category in df_historical['symbol'].unique():
    filtered_df = df_historical[df_historical['symbol'] == category]
    fig.add_trace(go.Scatter(
        x=filtered_df['Date'],
        y=filtered_df[EMA2],
        mode='lines',
        name=f'SMA{EMA2_ALPHA} - {category}',
        yaxis='y1'
    ))

# Update layout
fig.update_layout(
    title='Adj Close and EMA over Time with Dual Y-Axis',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Adj Close and EMA'),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

# Display the plot
fig.show()

## Strategy Following Return Cal
1. Trending following
2. Buy and hold

In [None]:
# Shift to the future by one day so that everyday uses the information up to yesterday to make a trading decision for tmr

# Save raw data to AWS RDS and local CSV

In [4]:
# Check the instance status
status = control_db.check_status()
print(status)

Current Status: available
available


In [5]:
# Start the instance
if status != 'available':
    # start the instance
    control_db.start_instance()
    control_db.check_status()
else:
    print('Instance already started')

Instance already started


In [6]:
# Save to AWS RDS
status = control_db.check_status()
if status == 'available':
    save_data.save_to_rds(data_list)
else:
    print('Instance not ready: ', status)

Current Status: available
Data inserted into table 'stock_company_info' successfully.
Data inserted into table 'stock_actions' successfully.
Data inserted into table 'stock_quarterly_income_stmt' successfully.
Data inserted into table 'stock_quarterly_balance_sheet' successfully.
Data inserted into table 'stock_quarterly_cashflow' successfully.
Data inserted into table 'stock_recommendations_summary' successfully.
Data inserted into table 'stock_upgrades_downgrades' successfully.
Data inserted into table 'stock_get_earnings_dates' successfully.
Data inserted into table 'stock_news' successfully.
Data inserted into table 'stock_data_historical' successfully.


In [7]:
# Save to local csv
save_data.save_to_csv(data_list)

Data saved in csv table 'stock_company_info' successfully.
Data saved in csv table 'stock_actions' successfully.
Data saved in csv table 'stock_quarterly_income_stmt' successfully.
Data saved in csv table 'stock_quarterly_balance_sheet' successfully.
Data saved in csv table 'stock_quarterly_cashflow' successfully.
Data saved in csv table 'stock_recommendations_summary' successfully.
Data saved in csv table 'stock_upgrades_downgrades' successfully.
Data saved in csv table 'stock_get_earnings_dates' successfully.
Data saved in csv table 'stock_news' successfully.
Data saved in csv table 'stock_data_historical' successfully.


In [9]:
# Check the instance status
status = control_db.check_status()
print(status)

Current Status: available
available


In [11]:
# Stop the instance
if status != 'stopped':
    # stop the instance
    control_db.stop_instance()
    control_db.check_status()
else:
    print('Instance already started')

Stopping the DB instance...
Instance stopped successfully
Current Status: stopped
