# 0.Preparation

## Load Packages

In [None]:
# for numerical analyiss
import numpy as np

# to store and process data in dataframe
import pandas as pd

# to interface with operating system
import os

# for offline ploting
import matplotlib.pyplot as plt

# interactive visualization
import plotly.express as px
import seaborn as sns; sns.set()

from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

# for trendlines
import statsmodels

## Define Color Scheme

In [None]:
# color pallette
# Hexademical code RRGGBB (True Black #000000, True White #ffffff)
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801' 

## Import data

In [None]:
# list files
!ls ../input/corona-virus-report

In [None]:
# Country wise
country_wise = pd.read_csv('../input/corona-virus-report/country_wise_latest.csv')

# Replace missing values '' with NAN and then 0
country_wise = country_wise.replace('', np.nan).fillna(0)

country_wise.info()
country_wise.head(10)

In [None]:
# Grouped by day, country
full_grouped = pd.read_csv('../input/corona-virus-report/full_grouped.csv')
full_grouped.info()
full_grouped.head(10)

# Convert Date from Dtype "Object" (or String) to Dtype "Datetime"
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
full_grouped.info()

# 1.How is the Covid19 situation in South Korea?

In [None]:
# Use Boolean indexing to generate a mask which is just a series of boolean values representing whether the column contains the specific element or not
selected = full_grouped['Country/Region'].str.contains('South Korea')

# Apply this mask to our original DataFrame to filter the required values.
korea = full_grouped[selected]
korea["New active"] = korea["Active"].diff()

korea.info()
korea.tail(10)

In [None]:
temp = korea.melt(id_vars="Date", value_vars=['New cases', 'New deaths'],
                 var_name='Case', value_name='Count')
temp.head()

fig = px.area(temp, x="Date", y="Count", color='Case', height=600, width=1200,
             title='Cases over time - S Korea', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

## COVID-19 situation in South Korea:
1. Started to show cases from late Jan and early Feb.
2. A huge peak during end Feb and early Mar.
3. The number of confirmed cases is rapidly decreasing from Mar to May.
4. From Jun, situation turned worse with more confirmed cases.
5. A peak during end Jul.

## Unusual patterns in the data?

<a id="subsection-two-two"></a>
## 2.2 Import Data

In [None]:
# Create an empty list
files = []

# Fill the list with the file names of the CSV files in the Kaggle folder
for dirname, _, filenames in os.walk('../input/korea-econfin-data'):
    for filename in filenames:
        files.append(os.path.join(dirname, filename))

# Sort the file names
files = sorted(files)

# Output the list of sorted file names
files

In [None]:
# Read the CSV files through list comprehension, which can be broken into three parts
# 1. OUTPUT EXPRESSION [pd.read_csv(f, na_values=['.'])] --- Note: this turns character '.' values into missing value
# 2. INPUT SEQUENCE [for f] 
# 3. CONDITION (OPTIONAL) [in files] 
series = [pd.read_csv(f, na_values=['.']) for f in files]

# Define series name, which becomes the dictionary key
series_name = ['btc','cpi','gold','korea','high_yield_bond','inv_grade_bond','moderna','employment','tesla_robinhood','trea_20y_bond','trea_10y_yield','tesla','korea_m1','wti']

# series name = dictionary key, series = dictionary value
series_dict = dict(zip(series_name, series))

Details on list comprehension [HERE](https://towardsdatascience.com/comprehending-the-concept-of-comprehensions-in-python-c9dafce5111).   
Details on dictionary data structure [HERE](https://realpython.com/python-dicts/).

<a id="subsection-two-three"></a>
## 2.3 Wrangle Data

In [None]:
# 1. korea 
korea = series_dict['korea']
korea['Date'] = pd.to_datetime(korea['Date'])
korea.rename(columns={'Adj Close':'korea'}, inplace=True)
korea['korea_return'] = korea['korea'].pct_change()
korea['korea_volatility_1m'] = (korea['korea_return'].rolling(20).std())*(20)**(1/2) # Annualize daily standard deviation
korea['korea_volatility_1y'] = (korea['korea_return'].rolling(252).std())*(252)**(1/2) # 252 trading days per year
korea = korea[['Date','korea','korea_return','korea_volatility_1m','korea_volatility_1y']]
# Calculate 1-month forward cumulative returns
korea['one_month_forward_korea_return'] = korea['korea_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

In [None]:
# 2. Bitcoin
btc = series_dict['btc']
btc['Date'] = pd.to_datetime(btc['Date'])
btc.rename(columns={'Adj Close':'btc'}, inplace=True)
btc['btc_return'] = btc['btc'].pct_change()
btc['btc_volatility_1m'] = (btc['btc_return'].rolling(20).std())*(20)**(1/2) 
btc['btc_volatility_1y'] = (btc['btc_return'].rolling(252).std())*(252)**(1/2) 
btc = btc[['Date','btc','btc_return','btc_volatility_1m','btc_volatility_1y']]
btc['one_month_forward_btc_return'] = btc['btc_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

In [None]:
# 3. Gold
gold = series_dict['gold']
gold['Date'] = pd.to_datetime(gold['DATE'])
gold.rename(columns={'GOLDPMGBD228NLBM':'gold'}, inplace=True)
gold['gold_lag1'] = gold['gold'].shift(1)
gold['gold_lag2'] = gold['gold'].shift(2)
gold['gold'] = gold['gold'].fillna(gold['gold_lag1'])
gold['gold'] = gold['gold'].fillna(gold['gold_lag2'])
gold["gold"] = gold["gold"].astype('float64')
gold['gold_return'] = gold['gold'].pct_change()
gold['gold_volatility_1m'] = (gold['gold_return'].rolling(20).std())*(20)**(1/2) 
gold['gold_volatility_1y'] = (gold['gold_return'].rolling(252).std())*(252)**(1/2) 
gold = gold[['Date','gold','gold_return','gold_volatility_1m','gold_volatility_1y']]
gold['one_month_forward_gold_return'] = gold['gold_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

In [None]:
# 4. High Yield Bond
high_yield_bond = series_dict['high_yield_bond']
high_yield_bond['Date'] = pd.to_datetime(high_yield_bond['Date'])
high_yield_bond.rename(columns={'Adj Close':'high_yield_bond'}, inplace=True)
high_yield_bond['high_yield_bond_return'] = high_yield_bond['high_yield_bond'].pct_change()
high_yield_bond['high_yield_bond_volatility_1m'] = (high_yield_bond['high_yield_bond_return'].rolling(20).std())*(20)**(1/2)
high_yield_bond['high_yield_bond_volatility_1y'] = (high_yield_bond['high_yield_bond_return'].rolling(252).std())*(252)**(1/2)
high_yield_bond = high_yield_bond[['Date','high_yield_bond','high_yield_bond_return','high_yield_bond_volatility_1m',
                                   'high_yield_bond_volatility_1y']]
high_yield_bond['one_month_forward_high_yield_bond_return'] = high_yield_bond['high_yield_bond_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

In [None]:
# 5. Investment Grade Bond
inv_grade_bond = series_dict['inv_grade_bond']
inv_grade_bond['Date'] = pd.to_datetime(inv_grade_bond['Date'])
inv_grade_bond.rename(columns={'Adj Close':'inv_grade_bond'}, inplace=True)
inv_grade_bond['inv_grade_bond_return'] = inv_grade_bond['inv_grade_bond'].pct_change()
inv_grade_bond['inv_grade_bond_volatility_1m'] = (inv_grade_bond['inv_grade_bond_return'].rolling(20).std())*(20)**(1/2)
inv_grade_bond['inv_grade_bond_volatility_1y'] = (inv_grade_bond['inv_grade_bond_return'].rolling(252).std())*(252)**(1/2)
inv_grade_bond = inv_grade_bond[['Date','inv_grade_bond','inv_grade_bond_return','inv_grade_bond_volatility_1m',
                                 'inv_grade_bond_volatility_1y']]
inv_grade_bond['one_month_forward_inv_grade_bond_return'] = inv_grade_bond['inv_grade_bond_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

In [None]:
# 6. Crude Oil WTI
wti = series_dict['wti']
wti['Date'] = pd.to_datetime(wti['DATE'])
wti.rename(columns={'WTISPLC':'wti'}, inplace=True)
wti['wti_return'] = wti['wti'].pct_change()
wti['wti_volatility_1m'] = wti['wti_return'].rolling(20).std()*(20)**(1/2)
wti['wti_volatility_1y'] = wti['wti_return'].rolling(252).std()*(252)**(1/2)
wti = wti[['Date','wti','wti_return','wti_volatility_1m','wti_volatility_1y']]
wti['one_month_forward_wti_return'] = wti['wti_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

In [None]:
#7. Inflation
cpi = series_dict['cpi']
cpi['Date'] = pd.to_datetime(cpi['DATE'])
cpi.rename(columns={'CUUR0000SEHE':'cpi'}, inplace=True)
cpi = cpi[['Date','cpi']]

In [None]:
#8. Employment
employment = series_dict['employment']
employment['Date'] = pd.to_datetime(employment['DATE'])
employment.rename(columns={'PAYEMS_CHG':'employment'}, inplace=True)
employment = employment[['Date','employment']]

In [None]:
#9. Korean M1
korea_m1 = series_dict['korea_m1']
korea_m1['Date'] = pd.to_datetime(korea_m1['DATE'])
korea_m1.rename(columns={'WALCL':'korea_m1'}, inplace=True)
korea_m1 = korea_m1[['Date','korea_m1']]

In [None]:
korea.tail(10)

In [None]:
# Import datasets with Pandas method read_csv
nber_recession_indicator_month = pd.read_csv('../input/nber-based-recession-indicators-united-states/USRECM.csv')
nber_recession_indicator_day = pd.read_csv('../input/nber-based-recession-indicators-united-states/USRECD.csv')

# Convert data types
nber_recession_indicator_day["Date"] = pd.to_datetime(nber_recession_indicator_day["date"])
nber_recession_indicator_day["recession"] = nber_recession_indicator_day["value"].astype('bool')

# Subset data columns
nber_recession_indicator_day = nber_recession_indicator_day[["Date","recession"]]

In [None]:
# Merge datasets together
asset_classes = [btc,cpi,gold,high_yield_bond,inv_grade_bond,employment,korea_m1,wti]

baseline = pd.merge(korea,nber_recession_indicator_day,how='left',left_on='Date', right_on="Date")

for asset_class in asset_classes:
    baseline = pd.merge(baseline,asset_class,how='left',left_on='Date', right_on="Date")

# Backfilling missing values,  
baseline.loc[baseline.Date >= '2020-03-01', "recession"] = 1
baseline["recession"] = baseline["recession"].fillna(0).astype(bool)

baseline.info()

Details on merge, join, and concat [HERE](https://realpython.com/pandas-merge-join-and-concat/).

<a id="section-three"></a>
# 3. WHAT DO DECADES OF ASSET RETURNS TELL US ABOUT INVESTING?

<a id="subsection-three-one"></a>
## 3.1 Question 1: What is the risk/return profile for different asset class?

In [None]:
baseline.tail()

In [None]:
# Index Date
baseline.set_index('Date', inplace=True)
baseline.tail()

In [None]:
# Re-sample the dataset every year and calculate the sum of returns
baseline_yearly_return = baseline[["korea_return", "btc_return", "gold_return", "high_yield_bond_return",  
                            "inv_grade_bond_return", "wti_return"]].dropna().resample('Y').sum().reset_index()

print(baseline_yearly_return['Date'].min()) # 2010-12-31
baseline_yearly_return.head()

Details on method resample [HERE](https://www.geeksforgeeks.org/python-pandas-dataframe-resample/).

In [None]:
# Re-sample the dataset every year and calculate the mean of 1-year volatility
baseline_yearly_volatility_1y = baseline[["korea_volatility_1y", "btc_volatility_1y", "gold_volatility_1y", 
                                          "high_yield_bond_volatility_1y", "inv_grade_bond_volatility_1y", 
                                          "wti_volatility_1y"]].dropna().resample('Y').mean().reset_index()

baseline_yearly = baseline_yearly_return.merge(baseline_yearly_volatility_1y, left_on='Date', right_on='Date')

baseline_yearly.head()

In [None]:
# Reshape dataset wide to tall with method melt
baseline_yearly_reshaped = baseline_yearly.melt(id_vars='Date', var_name='key', value_name='value')
baseline_yearly_reshaped.head()

For more details on method melt [HERE](https://www.geeksforgeeks.org/python-pandas-melt/)

In [None]:
baseline_yearly_reshaped['metric'] = np.where(baseline_yearly_reshaped['key'].str.contains(pat = 'return'), 'return', 'volatility')
baseline_yearly_reshaped['position']= baseline_yearly_reshaped['key'].str.find('_') 
baseline_yearly_reshaped['asset_class']= baseline_yearly_reshaped['key'].str.slice(0,3,1)
baseline_yearly_reshaped = baseline_yearly_reshaped[['Date','metric','asset_class','value']]
baseline_yearly_reshaped.head()

In [None]:
# Display return and volatility for each asset class
print(baseline_yearly_reshaped[baseline_yearly_reshaped['metric'] == 'return'].groupby('asset_class').mean())
print(baseline_yearly_reshaped[baseline_yearly_reshaped['metric'] == 'volatility'].groupby('asset_class').mean())

In [None]:
baseline.tail()

In [None]:
# Reset index
baseline.reset_index(inplace=True)
baseline.tail()

In [None]:
# Output summary statistics
baseline[["korea_return", "korea_volatility_1y", "btc_return", "btc_volatility_1y", "gold_return", "gold_volatility_1y", 
                  "high_yield_bond_return", "high_yield_bond_volatility_1y", "inv_grade_bond_return", 
                  "inv_grade_bond_volatility_1y", "wti_return", "wti_volatility_1y"]].describe()

<a id="subsection-three-two"></a>
## Which asset class is a good hedge against recession?

In [None]:
# Plot a jointplot with a regression line
sns.jointplot(x = 'gold_return', y = 'korea_return', data = baseline, kind='reg')

Korean stock index and gold returns seem uncorrelated.

In [None]:
def plot_chart(series):
    fig = px.scatter(baseline[baseline[series].notnull()], x="Date", y=series, color="recession", color_discrete_sequence=['#636EFA', '#FFA15A'], width=1200)
    fig.update_traces(mode='markers', marker_size=4)
    fig.update_layout(title=series, xaxis_title="", yaxis_title="")
    fig.show()

In [None]:
plot_chart("korea")

In [None]:
plot_chart("gold")

Gold is not a good asset to hedge the stock market. Gold has little connection with stock and cannot hedge stocks, but can hedge economic recession.

In [None]:
plot_chart('btc')

Bitcon and Korean stock have similar trends in the face of economic crisis

In [None]:
# Plot pairplot
baseline_returns = baseline[["korea_return", "btc_return", "gold_return", "high_yield_bond_return", "inv_grade_bond_return", "wti_return", "recession"]]

sns.pairplot(baseline_returns, hue="recession")

<a id="subsection-three-three"></a>
## Does volatility foretell future return?    

In [None]:
def plot_chart_vol_ret(series):
    fig = px.scatter(baseline[baseline[series+'_return'].notnull()], x=series + '_volatility_1m', 
                     y='one_month_forward_' + series + '_return', width=800,
                     trendline = 'ols')
    fig.update_layout(title=str(series) + ' volatility vs one-month forward return', xaxis_title='', yaxis_title='')
    fig.show()
    
def plot_chart_vol_ret_by_recession(series):
    fig = px.scatter(baseline[baseline[series+'_return'].notnull()], x=series + '_volatility_1m', \
                     color='recession', y='one_month_forward_' + series + '_return', 
                     color_discrete_sequence=['#636EFA', '#FFA15A'], width=800,
                     trendline = 'ols')
    fig.update_layout(title=str(series) + ' volatility vs one-month forward return', xaxis_title='', yaxis_title='')
    fig.show()

In [None]:
plot_chart_vol_ret('korea')

In [None]:
plot_chart_vol_ret_by_recession('korea')

In [None]:
# Plot heatmap of the relationships across different asset classes
baseline_corr = baseline[['korea_return', 'korea_volatility_1y', 'btc_return', 'btc_volatility_1y',
                         'gold_return', 'gold_volatility_1y', 'high_yield_bond_return', 'high_yield_bond_volatility_1y',
                         'inv_grade_bond_return', 'inv_grade_bond_volatility_1y', 'wti_return', 'wti_volatility_1y',
                         'recession']].dropna().corr()

fig, ax = plt.subplots(figsize=(20,10)) 
sns.heatmap(baseline_corr, annot=True, ax = ax)