## Initial Import:

In [2]:
import pandas as pd
from datetime import datetime
import os    
from dotenv  import load_dotenv
from pathlib import Path
%matplotlib inline

## Importing Data from Yahoo Finance:

##### **<span style=color:red>  Install yfinance:**

In [3]:
# !pip install yfinance
import yfinance as yf

### U.S. Dollar Index (USDX) Futures Contract (DX=F):

In [4]:
yf_dollar_df = yf.download("DX=F", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_dollar_df.dtypes)
print("\n")
print(yf_dollar_df.shape)
print("\n")
print(yf_dollar_df[yf_dollar_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_dollar_df.isnull().sum())
print("\n")
print(yf_dollar_df.tail())

[*********************100%***********************]  1 of 1 downloaded
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


(1000, 6)


Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


              Open   High     Low   Close  Adj Close  Volume
Date                                                        
2020-12-21  90.045  90.95  89.925  89.949     89.949   39253
2020-12-22  90.100  90.62  89.950  90.546     90.546   21871
2020-12-23  90.535  90.58  90.050  90.340     90.340   27553
2020-12-28  90.260  90.32  89.895  90.275     90.275   10826
2020-12-30  89.865  89.90  89.670  89.825     89.825    5764


### Gold Futures Contract (GC=F):

In [5]:
yf_gold_df = yf.download("GC=F", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_gold_df.dtypes)
print("\n")
print(yf_gold_df.shape)
print("\n")
print(yf_gold_df[yf_gold_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_gold_df.isnull().sum())
print("\n")
print(yf_gold_df.tail())

[*********************100%***********************]  1 of 1 downloaded
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


(1079, 6)


Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


              Open    High     Low   Close  Adj Close  Volume
Date                                                         
2020-12-22  1882.2  1882.2  1861.0  1866.6     1866.6     233
2020-12-23  1867.0  1878.8  1864.5  1874.7     1874.7      82
2020-12-28  1895.5  1895.8  1873.7  1877.2     1877.2      75
2020-12-29  1881.3  1881.3  1879.7  1879.7     1879.7      75
2020-12-30  1881.8  1890.1  1880.6  1882.7     1882.7   34899


### Bitcoin USD (BTC-USD):

In [6]:
yf_bitcoin_df = yf.download("BTC-USD", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_bitcoin_df.dtypes)
print("\n")
print(yf_bitcoin_df.shape)
print("\n")
print(yf_bitcoin_df[yf_bitcoin_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_bitcoin_df.isnull().sum())
print("\n")
print(yf_bitcoin_df.tail())

[*********************100%***********************]  1 of 1 downloaded
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


(1461, 6)


Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


                Open      High       Low     Close  Adj Close       Volume
Date                                                                      
2020-12-26  24677.02  26718.07  24522.69  26437.04   26437.04  48332647295
2020-12-27  26439.37  28288.84  25922.77  26272.29   26272.29  66479895605
2020-12-28  26280.82  27389.11  26207.64  27084.81   27084.81  49056742893
2020-12-29  27081.81  27370.72  25987.30  27362.44   27362.44  45265946774
2020-12-30  27792.21  28533.95  27445.65  27814.65   27814.65  53714993152


### Other Data: S&P 500 Index (^GSPC)：

In [7]:
yf_sp500_df = yf.download("^GSPC", start="2017-01-01", end="2020-12-31")

# Checking Data Quality:
print(yf_sp500_df.dtypes)
print("\n")
print(yf_sp500_df.shape)
print("\n")
print(yf_sp500_df[yf_sp500_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(yf_sp500_df.isnull().sum())
print("\n")
print(yf_sp500_df.tail())

[*********************100%***********************]  1 of 1 downloaded
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


(1005, 6)


Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


               Open     High      Low    Close  Adj Close      Volume
Date                                                                 
2020-12-22  3698.08  3698.26  3676.16  3687.26    3687.26  4023940000
2020-12-23  3693.42  3711.24  3689.28  3690.01    3690.01  3772630000
2020-12-24  3694.03  3703.82  3689.32  3703.06    3703.06  1885090000
2020-12-28  3723.03  3740.51  3723.03  3735.36    3735.36  3527460000
2020-12-29  3750.01  3756.12  3723.31  3727.04    3727.04  3387030000


## Importing Data from Investing by Reading CSV:

### Other Data: M2 US Money Supply:

In [8]:
csvpath = Path("Resources/M2.csv")
m2_df = pd.read_csv(csvpath, index_col="DATE", parse_dates=True, infer_datetime_format=True).sort_values("DATE")
m2_df.rename(columns={"M2" : "M2(billions)"}, inplace=True)

# Checking Data Quality:
print(m2_df.dtypes)
print("\n")
print(m2_df.shape)
print("\n")
print(m2_df[m2_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(m2_df.isnull().sum())
print("\n")
print(m2_df.tail())

M2(billions)    float64
dtype: object


(206, 1)


Empty DataFrame
Columns: [M2(billions)]
Index: []


M2(billions)    0
dtype: int64


            M2(billions)
DATE                    
2020-11-09       19067.1
2020-11-16       19108.4
2020-11-23       19120.7
2020-11-30       18998.0
2020-12-07       19226.1


## Rename Columns & Copy Original DataFrames with Selected Columns:

In [9]:
# Rename Columns:
selected_dollar_df = yf_dollar_df.copy()
selected_dollar_df.columns = [('dollar_'+ column) for column in selected_dollar_df.columns]
# Selected Columns:
selected_dollar_df = selected_dollar_df.iloc[:,[-2,-1]]
selected_dollar_df.tail(3)

Unnamed: 0_level_0,dollar_Adj Close,dollar_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-23,90.34,27553
2020-12-28,90.275,10826
2020-12-30,89.825,5764


In [10]:
# Rename Columns:
selected_gold_df = yf_gold_df.copy()
selected_gold_df.columns = [('gold_'+ column) for column in selected_gold_df.columns]
# Selected Columns:
selected_gold_df = selected_gold_df.iloc[:,[-2,-1]]
selected_gold_df.tail(3)

Unnamed: 0_level_0,gold_Adj Close,gold_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-28,1877.2,75
2020-12-29,1879.7,75
2020-12-30,1882.7,34899


In [11]:
# Rename Columns:
selected_bitcoin_df = yf_bitcoin_df.copy()
selected_bitcoin_df.columns = [('bitcoin_'+ column) for column in selected_bitcoin_df.columns]
# Selected Columns:
selected_bitcoin_df = selected_bitcoin_df.iloc[:,[-2,-1]]
selected_bitcoin_df.tail(3)

Unnamed: 0_level_0,bitcoin_Adj Close,bitcoin_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-28,27084.81,49056742893
2020-12-29,27362.44,45265946774
2020-12-30,27814.65,53714993152


In [12]:
# Rename Columns:
selected_sp500_df = yf_sp500_df.copy()
selected_sp500_df.columns = [('sp500_'+ column) for column in selected_sp500_df.columns]
# Selected Columns:
selected_sp500_df = selected_sp500_df.iloc[:,[-2,-1]]
selected_sp500_df.tail(3)

Unnamed: 0_level_0,sp500_Adj Close,sp500_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-24,3703.06,1885090000
2020-12-28,3735.36,3527460000
2020-12-29,3727.04,3387030000


## Concatenating Selected DataFrames for Further  Analysis:

In [13]:
joined_df = pd.concat([selected_dollar_df, selected_gold_df, selected_bitcoin_df, selected_sp500_df], axis = 'columns', join='inner', sort=True)

# Checking Data Quality:
print(joined_df.dtypes)
print("\n")
print(joined_df.shape)
print("\n")
print(joined_df[joined_df.duplicated(keep=False)]) #False: Mark all duplicates as True
print("\n")
print(joined_df.isnull().sum())
print("\n")
print(joined_df.tail())

dollar_Adj Close     float64
dollar_Volume          int64
gold_Adj Close       float64
gold_Volume            int64
bitcoin_Adj Close    float64
bitcoin_Volume         int64
sp500_Adj Close      float64
sp500_Volume           int64
dtype: object


(993, 8)


Empty DataFrame
Columns: [dollar_Adj Close, dollar_Volume, gold_Adj Close, gold_Volume, bitcoin_Adj Close, bitcoin_Volume, sp500_Adj Close, sp500_Volume]
Index: []


dollar_Adj Close     0
dollar_Volume        0
gold_Adj Close       0
gold_Volume          0
bitcoin_Adj Close    0
bitcoin_Volume       0
sp500_Adj Close      0
sp500_Volume         0
dtype: int64


            dollar_Adj Close  dollar_Volume  gold_Adj Close  gold_Volume  \
Date                                                                       
2020-12-18            89.953          17930          1885.7          276   
2020-12-21            89.949          39253          1879.2          136   
2020-12-22            90.546          21871          1866.6          233

## Calculating Historical Daily Returns:

In [14]:
joined_returns_df = pd.DataFrame()
joined_returns_df['dollar_return'] = joined_df['dollar_Adj Close'].pct_change()
joined_returns_df['gold_return'] = joined_df['gold_Adj Close'].pct_change()
joined_returns_df['bitcoin_return'] = joined_df['bitcoin_Adj Close'].pct_change()
joined_returns_df['sp500_return'] = joined_df['sp500_Adj Close'].pct_change()
joined_returns_df.dropna(inplace=True)
joined_returns_df.head()

Unnamed: 0_level_0,dollar_return,gold_return,bitcoin_return,sp500_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-04,-0.004767,0.00293,0.106233,0.005722
2017-01-05,-0.011499,0.013662,-0.12241,-0.000771
2017-01-06,0.006708,-0.006612,-0.109712,0.003517
2017-01-09,-0.002935,0.009898,0.000698,-0.003549
2017-01-10,0.000854,0.000591,0.005372,0.0


## Candlestick with Volume:

In [15]:
from bokeh.models import BooleanFilter, CDSView, Select, Range1d, HoverTool, CrosshairTool
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.layouts import gridplot
from bokeh.palettes import Category20
from bokeh.plotting import figure, output_file, show, ColumnDataSource

In [16]:
# Candlestick with Volume:
def plot_candlesticks(df_input):
    stock = ColumnDataSource(data=dict(index=[], Date=[], Open=[], Close=[], High=[], Low=[], Volume=[]))
    stock.data = stock.from_df(df_input.reset_index())    
   
    # Settings:
    VBAR_WIDTH = 0.5
    RED = Category20[7][6]
    BLUE = Category20[3][0]
    W_PLOT = 1500
    H_PLOT = 600
    
    # Tools Selections:
    TOOLS = "pan,xwheel_zoom,box_zoom,hover,crosshair,undo,redo,reset,save"  #replace to wheelzoom, y 轴可调整但bar很大
    linked_crosshair = CrosshairTool(dimensions="both")

    # Graph One:
    p1 = figure(plot_width=W_PLOT, plot_height=H_PLOT, tools=TOOLS, active_scroll='xwheel_zoom', active_drag='pan',
               title="S&P500 Candlestick with Volume", toolbar_location='above')

    inc = stock.data['Close'] > stock.data['Open']
    dec = stock.data['Open'] > stock.data['Close']
    view_inc = CDSView(source=stock, filters=[BooleanFilter(inc)])
    view_dec = CDSView(source=stock, filters=[BooleanFilter(dec)])

    # Map dataframe indices to date strings and use as label overrides:
    p1.xaxis.major_label_overrides = {
        i+int(stock.data['index'][0]): date.strftime('%b-%d') for i, date in enumerate(pd.to_datetime(stock.data["Date"]))
    }
    p1.xaxis.bounds = (stock.data['index'][0], stock.data['index'][-1])

    # Rendering the Graph:
    p1.segment(x0='index', x1='index', y0='Low', y1='High', color=BLUE, source=stock, view=view_inc)
    p1.segment(x0='index', x1='index', y0='Low', y1='High', color=RED, source=stock, view=view_dec)

    p1.vbar(x='index', width=VBAR_WIDTH, top='Open', bottom='Close', fill_color=BLUE, line_color=BLUE,
           source=stock,view=view_inc, name="price")
    p1.vbar(x='index', width=VBAR_WIDTH, top='Open', bottom='Close', fill_color=RED, line_color=RED,
           source=stock,view=view_dec, name="price")

    # Formating Graph One:
    p1.xaxis.major_label_orientation = 3.1415/4
    p1.x_range.range_padding = 0.05
    p1.xaxis.ticker.desired_num_ticks = 50
    p1.yaxis.formatter = NumeralTickFormatter(format=' 0,0[.]000')
    p1.add_tools(linked_crosshair)
    
    # Select specific tool for the plot:
    price_hover = p1.select(dict(type=HoverTool))
    # Choose, which glyphs are active by glyph name
    price_hover.names = ["price"]
    # Creating tooltips
    price_hover.tooltips = [("Open", "@Open{0,0.00}"),
                            ("Close", "@Close{0,0.00}"),
                            ("Volume", "@Volume{(0.00 a)}")]
    price_hover.formatters = {"Date": 'datetime'}

    
    # Added-on Graph Two For Volume:
    p2 = figure(x_axis_type="datetime", tools="", toolbar_location=None, plot_width=W_PLOT, 
                plot_height=200, x_range=p1.x_range)
    
    # Map dataframe indices to date strings and use as label overrides:
    p2.xaxis.major_label_overrides = {
        i+int(stock.data['index'][0]): date.strftime('%b-%d') for i, date in enumerate(pd.to_datetime(stock.data["Date"]))
    }
    
    # Reder the Graph:
    p2.vbar(stock.data['index'], VBAR_WIDTH, stock.data['Volume'])
    
    # Formating Graph Two:
    p2.xaxis.major_label_orientation = 3.1415/4
    p2.xaxis.ticker.desired_num_ticks = 50
    p2.yaxis.formatter = NumeralTickFormatter(format='0,0[.]000')
    p2.add_tools(linked_crosshair)

    return gridplot([[p1],[p2]])

In [17]:
# Display the Graph:
show(plot_candlesticks(yf_sp500_df))