## Initial Import:

In [None]:
import pandas as pd
from datetime import datetime
import os    
from dotenv  import load_dotenv
from pathlib import Path
%matplotlib inline

## Importing Data from Yahoo Finance:

##### **<span style=color:red>  Install yfinance:**

In [None]:
# !pip install yfinance
import yfinance as yf

### U.S. Dollar Index (USDX) Futures Contract (DX=F):

In [None]:
yf_dollar_df = yf.download("DX=F", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_dollar_df.dtypes)
print("\n")
print(yf_dollar_df.shape)
print("\n")
print(yf_dollar_df[yf_dollar_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_dollar_df.isnull().sum())
print("\n")
print(yf_dollar_df.tail())

### Gold Futures Contract (GC=F):

In [None]:
yf_gold_df = yf.download("GC=F", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_gold_df.dtypes)
print("\n")
print(yf_gold_df.shape)
print("\n")
print(yf_gold_df[yf_gold_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_gold_df.isnull().sum())
print("\n")
print(yf_gold_df.tail())

### Bitcoin USD (BTC-USD):

In [None]:
yf_bitcoin_df = yf.download("BTC-USD", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_bitcoin_df.dtypes)
print("\n")
print(yf_bitcoin_df.shape)
print("\n")
print(yf_bitcoin_df[yf_bitcoin_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_bitcoin_df.isnull().sum())
print("\n")
print(yf_bitcoin_df.tail())

### Other Data: S&P 500 Index (^GSPC)：

In [None]:
yf_sp500_df = yf.download("^GSPC", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_sp500_df.dtypes)
print("\n")
print(yf_sp500_df.shape)
print("\n")
print(yf_sp500_df[yf_sp500_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_sp500_df.isnull().sum())
print("\n")
print(yf_sp500_df.tail())

## Importing Data from Investing by Reading CSV:

### Other Data: M2 US Money Supply:

In [None]:
csvpath = Path("Resources/M2.csv")
m2_df = pd.read_csv(csvpath, index_col="DATE", parse_dates=True, infer_datetime_format=True).sort_values("DATE")
m2_df.rename(columns={"M2" : "M2(billions)"}, inplace=True)

# Checking Data Quality:
print(m2_df.dtypes)
print("\n")
print(m2_df.shape)
print("\n")
print(m2_df[m2_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(m2_df.isnull().sum())
print("\n")
print(m2_df.tail())

## Rename Columns & Copy Original DataFrames with Selected Columns:

In [None]:
# Rename Columns:
selected_dollar_df = yf_dollar_df.copy()
selected_dollar_df.columns = [('dollar_'+ column) for column in selected_dollar_df.columns]
# Selected Columns:
selected_dollar_df = selected_dollar_df.iloc[:,[-2,-1]]
selected_dollar_df.tail(3)

In [None]:
# Rename Columns:
selected_gold_df = yf_gold_df.copy()
selected_gold_df.columns = [('gold_'+ column) for column in selected_gold_df.columns]
# Selected Columns:
selected_gold_df = selected_gold_df.iloc[:,[-2,-1]]
selected_gold_df.tail(3)

In [None]:
# Rename Columns:
selected_bitcoin_df = yf_bitcoin_df.copy()
selected_bitcoin_df.columns = [('bitcoin_'+ column) for column in selected_bitcoin_df.columns]
# Selected Columns:
selected_bitcoin_df = selected_bitcoin_df.iloc[:,[-2,-1]]
selected_bitcoin_df.tail(3)

In [None]:
# Rename Columns:
selected_sp500_df = yf_sp500_df.copy()
selected_sp500_df.columns = [('sp500_'+ column) for column in selected_sp500_df.columns]
# Selected Columns:
selected_sp500_df = selected_sp500_df.iloc[:,[-2,-1]]
selected_sp500_df.tail(3)

## Concatenating Selected DataFrames for Further  Analysis:

In [None]:
joined_df = pd.concat([selected_dollar_df, selected_gold_df, selected_bitcoin_df, selected_sp500_df], axis = 'columns', join='inner', sort=True)

# Checking Data Quality:
print(joined_df.dtypes)
print("\n")
print(joined_df.shape)
print("\n")
print(joined_df[joined_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(joined_df.isnull().sum())
print("\n")
print(joined_df.tail())

## Calculating Historical Daily Returns:

In [None]:
joined_returns_df = pd.DataFrame()
joined_returns_df['dollar_return'] = joined_df['dollar_Adj Close'].pct_change()
joined_returns_df['gold_return'] = joined_df['gold_Adj Close'].pct_change()
joined_returns_df['bitcoin_return'] = joined_df['bitcoin_Adj Close'].pct_change()
joined_returns_df['sp500_return'] = joined_df['sp500_Adj Close'].pct_change()
joined_returns_df.dropna(inplace=True)
joined_returns_df.head()

## Candlestick with Volume:

In [None]:
from bokeh.models import BooleanFilter, CDSView, Select, Range1d, HoverTool, CrosshairTool
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.layouts import gridplot
from bokeh.palettes import Category20
from bokeh.plotting import figure, output_file, show, ColumnDataSource

In [None]:
# Candlestick with Volume:
def plot_candlesticks(df_input):
    stock = ColumnDataSource(data=dict(index=[], Date=[], Open=[], Close=[], High=[], Low=[], Volume=[]))
    stock.data = stock.from_df(df_input.reset_index())    
   
    # Settings:
    VBAR_WIDTH = 0.5
    RED = Category20[7][6]
    BLUE = Category20[3][0]
    W_PLOT = 1500
    H_PLOT = 600
    
    # Tools Selections:
    TOOLS = "pan,xwheel_zoom,box_zoom,hover,crosshair,undo,redo,reset,save"  #replace to wheelzoom, y 轴可调整但bar很大
    linked_crosshair = CrosshairTool(dimensions="both")

    # Graph One:
    p1 = figure(plot_width=W_PLOT, plot_height=H_PLOT, tools=TOOLS, active_scroll='xwheel_zoom', active_drag='pan',
               title="S&P500 Candlestick with Volume", toolbar_location='above')

    inc = stock.data['Close'] > stock.data['Open']
    dec = stock.data['Open'] > stock.data['Close']
    view_inc = CDSView(source=stock, filters=[BooleanFilter(inc)])
    view_dec = CDSView(source=stock, filters=[BooleanFilter(dec)])

    # Map dataframe indices to date strings and use as label overrides:
    p1.xaxis.major_label_overrides = {
        i+int(stock.data['index'][0]): date.strftime('%b-%d') for i, date in enumerate(pd.to_datetime(stock.data["Date"]))
    }
    p1.xaxis.bounds = (stock.data['index'][0], stock.data['index'][-1])

    # Rendering the Graph:
    p1.segment(x0='index', x1='index', y0='Low', y1='High', color=BLUE, source=stock, view=view_inc)
    p1.segment(x0='index', x1='index', y0='Low', y1='High', color=RED, source=stock, view=view_dec)

    p1.vbar(x='index', width=VBAR_WIDTH, top='Open', bottom='Close', fill_color=BLUE, line_color=BLUE,
           source=stock,view=view_inc, name="price")
    p1.vbar(x='index', width=VBAR_WIDTH, top='Open', bottom='Close', fill_color=RED, line_color=RED,
           source=stock,view=view_dec, name="price")

    # Formating Graph One:
    p1.xaxis.major_label_orientation = 3.1415/4
    p1.x_range.range_padding = 0.05
    p1.xaxis.ticker.desired_num_ticks = 50
    p1.yaxis.formatter = NumeralTickFormatter(format=' 0,0[.]000')
    p1.add_tools(linked_crosshair)
    
    # Select specific tool for the plot:
    price_hover = p1.select(dict(type=HoverTool))
    # Choose, which glyphs are active by glyph name
    price_hover.names = ["price"]
    # Creating tooltips
    price_hover.tooltips = [("Open", "@Open{0,0.00}"),
                            ("Close", "@Close{0,0.00}"),
                            ("Volume", "@Volume{(0.00 a)}")]
    price_hover.formatters = {"Date": 'datetime'}

    
    # Added-on Graph Two For Volume:
    p2 = figure(x_axis_type="datetime", tools="", toolbar_location=None, plot_width=W_PLOT, 
                plot_height=200, x_range=p1.x_range)
    
    # Map dataframe indices to date strings and use as label overrides:
    p2.xaxis.major_label_overrides = {
        i+int(stock.data['index'][0]): date.strftime('%b-%d') for i, date in enumerate(pd.to_datetime(stock.data["Date"]))
    }
    
    # Reder the Graph:
    p2.vbar(stock.data['index'], VBAR_WIDTH, stock.data['Volume'])
    
    # Formating Graph Two:
    p2.xaxis.major_label_orientation = 3.1415/4
    p2.xaxis.ticker.desired_num_ticks = 50
    p2.yaxis.formatter = NumeralTickFormatter(format='0,0[.]000')
    p2.add_tools(linked_crosshair)

    return gridplot([[p1],[p2]])

In [None]:
# Display the Graph:
show(plot_candlesticks(yf_sp500_df))