In [2]:
from datetime import datetime
import pandas as pd
import numpy as np
import yaml
import time
import pandas_datareader.data as web

In [19]:
with open(f"../retrieve_target.yaml", 'rb') as file:
    retrieves = yaml.safe_load(file)

In [6]:
start = datetime(2017, 1, 1)
end = datetime.today()
print(f"retrieve from {start.strftime('%Y%m%d')} to {end.strftime('%Y%m%d')}")
for k, v in retrieves["ticker"].items():
    # read ticker and source
    ticker = k
    source = v["source"]

    # retrive data
    if source == "stooq":
        stock = web.StooqDailyReader(ticker, start=start, end=end).read()
    elif source == "yahoo":
        stock = web.DataReader(ticker, "yahoo", start=start, end=end)

    # data null check
    if len(stock) == 0:
        raise ValueError("retrieved data is null")

    # save data
    stock = stock.reset_index(drop=False)
    # stock.to_csv(f"../data/{ticker}.csv", index=None)
    with open(f"../data/{ticker}.csv", "wb") as f:
        stock.to_csv(f, index=False)
    print(f"successfuly retrived: {ticker}")

    # gentle to data source
    time.sleep(1)


retrieve from 20170101 to 20220625
successfuly retrived: VOO
successfuly retrived: VWO
successfuly retrived: VEA
successfuly retrived: VTI
successfuly retrived: BND
successfuly retrived: BTC-USD
successfuly retrived: ETH-USD
successfuly retrived: DBA
successfuly retrived: USO
successfuly retrived: QQQ
successfuly retrived: VGT
successfuly retrived: VHT
successfuly retrived: VCR
successfuly retrived: XLC
successfuly retrived: VFH
successfuly retrived: XLI
successfuly retrived: VDC
successfuly retrived: VPU
successfuly retrived: XLB
successfuly retrived: XLRE
successfuly retrived: XLE


In [8]:
# create date master
dt_master = pd.DataFrame(columns=['Date'])
for t in retrieves["ticker"].keys():
    with open(f"../data/{ticker}.csv", "rb") as f:
        df = pd.read_csv(f)
    dt_master = pd.concat([dt_master, df.loc[:, ['Date']]], axis=0)
dt_master = dt_master.drop_duplicates(subset="Date", keep="first")
dt_master = dt_master.sort_values('Date')

In [9]:
len(dt_master)

1378

In [20]:
with open(f"../portfolio.yaml", 'rb') as file:
    portfolio = yaml.safe_load(file)

In [23]:
num_holds = {k: 0 for k in retrieves['ticker'].keys()}
for k in portfolio['ticker'].keys():
    num_holds[k] = 1

In [30]:
stocks = dt_master
for k in num_holds.keys():
    with open(f"../data/{k}.csv", "rb") as f:
        df = pd.read_csv(f)
    df = df.add_suffix(f'_{k}')
    df_unq = f'Date_{k}'
    stocks = pd.merge(stocks, df, how='left', left_on='Date', right_on=df_unq)

In [32]:
# select use column
cols = [f'Close_{k}' for k in num_holds.keys()]
cols.insert(0, 'Date')
stocks = stocks.loc[:, cols]

In [33]:
# calculate portfolio value
def calc_portfolio(x):
    pf = 0
    for k, v in num_holds.items():
        pf += x[f'Close_{k}'] * v
    return pf
stocks['Close_Portfolio'] = stocks.apply(lambda x: calc_portfolio(x), axis=1)

In [27]:
# convert to int
for k in num_holds.keys():
    ticker = k
    stocks[f"Close_{k}"] = np.floor(
        pd.to_numeric(stocks[f"Close_{k}"], errors="coerce")
    ).astype("Int64")
stocks["Close_Portfolio"] = np.floor(
        pd.to_numeric(stocks["Close_Portfolio"], errors="coerce")
    ).astype("Int64")

In [28]:
stocks.to_pickle('../data/stocks.pkl')

In [50]:
# recent value ratio
recent_valid_index = stocks.dropna(subset=['Close_Portfolio']).tail(1).index.values[0]
# recent value percent
recent_values = []
for k in portfolio['ticker'].keys():
    recent = stocks.loc[recent_valid_index, f'Close_{k}'] * num_holds[k] / stocks.loc[recent_valid_index, 'Close_Portfolio'] * 100
    recent = round(recent, 2)
    recent_values.append(recent)
ratio = pd.DataFrame(data={'ticker': portfolio['ticker'].keys(), 'ratio_percent': recent_values})

In [59]:
ratio['type'] = ratio.ticker.apply(lambda x: portfolio['ticker'][x]['type'])
ratio['sector'] = ratio.ticker.apply(lambda x: portfolio['ticker'][x]['sector'])
ratio['detail'] = ratio.ticker.apply(lambda x: portfolio['ticker'][x]['detail'])
ratio['num_holds'] = ratio.ticker.apply(lambda x: num_holds[x])

In [60]:
ratio.to_pickle('../data/ratio.pkl')

In [61]:
ratio

Unnamed: 0,ticker,ratio_percent,type,detail,num_holds,sector
0,BND,0.32,bond,VANGUARD TOTAL BOND MARKET ETF,1,none
1,BTC-USD,90.85,cripto,Bitcoin / USD,1,none
2,ETH-USD,4.93,cripto,Etherium / USD,1,none
3,VEA,0.17,stock,VANGUARD FTSE DEVELOPED MARKETS ETF,1,none
4,VOO,1.5,stock,VANGUARD S&P 500 ETF,1,none
5,VTI,0.81,stock,VANGUARD TOTAL STOCK MARKET ETF,1,none
6,VWO,0.18,stock,VANGUARD FTSE EMERGING MARKETS ETF,1,none
7,QQQ,1.23,stock,Invesco QQQ ETF,1,IT


In [62]:
# calc sharpe ratio

In [135]:
sharpe = stocks.loc[:, ['Date', 'Close_Portfolio']]

In [136]:
sharpe = sharpe.dropna(subset=['Close_Portfolio'])

In [137]:
sharpe['lag_1d'] = sharpe['Close_Portfolio'].shift(1)

In [138]:
import math

In [139]:
sharpe['rate_change'] = np.log(sharpe.Close_Portfolio / sharpe.lag_1d)
# sharpe['rate_change'] = sharpe['Close_Portfolio'].pct_change()


invalid value encountered in log



In [140]:
sharpe['one_year_mean'] = sharpe['rate_change'].rolling(252).mean()

In [141]:
sharpe['one_year_std'] = sharpe['rate_change'].rolling(252).std()

In [146]:
sharpe['sharpe_ratio'] = sharpe.one_year_mean / sharpe.one_year_std

In [147]:
sharpe['sharpe_ratio_annual'] = sharpe['sharpe_ratio'] * 252 ** 0.5

In [148]:
sharpe

Unnamed: 0,Date,Close_Portfolio,lag_1d,rate_change,one_year_mean,one_year_std,sharpe_ratio,sharpe_ratio_annual
369,2018-06-21,7947.873239,,,,,,
370,2018-06-22,7241.293927,7947.873239,-0.093104,,,,
371,2018-06-25,7391.225173,7241.293927,0.020494,,,,
372,2018-06-26,7209.676925,7391.225173,-0.024869,,,,
373,2018-06-27,7276.272873,7209.676925,0.009195,,,,
...,...,...,...,...,...,...,...,...
1373,2022-06-16,22397.391104,24785.716265,-0.101323,-0.002453,0.043808,-0.055990,-0.888812
1374,2022-06-17,22510.961709,22397.391104,0.005058,-0.002195,0.043659,-0.050272,-0.798043
1375,2022-06-21,22807.602241,22510.961709,0.013092,-0.001665,0.043024,-0.038694,-0.614245
1376,2022-06-22,22009.001172,22807.602241,-0.035642,-0.001899,0.043047,-0.044116,-0.700326


In [149]:
import plotly.express as px
fig = px.line(sharpe, x="Date", y="sharpe_ratio_annual")
fig.show()

In [151]:
sharpe = sharpe.dropna(subset=["sharpe_ratio_annual"])
sharpe = sharpe.loc[:, ["Date", "sharpe_ratio_annual"]]

In [152]:
sharpe.to_pickle('../data/sharpe.pkl')