In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 11

# layout of plotly graph objects
mlayout = {'title':'TITLE', 'width':800, 'height':400, 'margin':dict(l=10,r=10,t=50,b=10)}

# Read trade history file

Read the trading history csv file downloaded from Rakuten Securities. The code expects strict formatting of the spreadsheet.

Modifications may be needed for history files from other sources.

In [2]:
fname = 'sample tradehistory (INVST).csv'

# the csv file follows a strict format and uses Shift JIS encoding
df_trades = pd.read_csv(fname, parse_dates=True, thousands=',', encoding='Shift JIS')

# drop unecessary columns from dataframe and replace Japanese headers with English ones cuz why not
df_trades.drop(['受渡日','分配金','口座','取引','買付方法','経費','為替レート','受付金額[現地通貨]'], axis=1, inplace=True)
df_trades.rename(columns={'約定日':'date', 
                          'ファンド名':'name',	
                          '数量［口］':'quantity', 
                          '単価':'buy price',	
                          '受渡金額/(ポイント利用)[円]':'cost/(point)',
                          '決済通貨':'currency'}, 
                          inplace=True)

df_trades['date'] = pd.to_datetime(df_trades['date'])
df_trades.index = df_trades['date']

# replace asset names with simpler handles 
replace_titles = {'eMAXIS Slim 国内株式(TOPIX)':'TOPIX',
                  'eMAXIS Slim 国内リートインデックス':'Domestic Reit',
                  'eMAXIS Slim 先進国債券インデックス':'Advanced government bond',
                  'eMAXIS Slim 全世界株式(除く日本)':'Global equities (excluding Japan)',
                  'eMAXIS Slim 米国株式(S&P500)':'S&P500',
                  'eMAXIS NASDAQ100インデックス':'NASDAQ100'}

df_trades.replace(replace_titles,inplace=True)
df_trades.head()

Unnamed: 0_level_0,date,name,quantity,buy price,cost/(point),currency
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-08-02,2021-08-02,TOPIX,7259,13777,10000,円
2021-08-02,2021-08-02,Domestic Reit,9799,10205,10000,円
2021-08-03,2021-08-03,Advanced government bond,8459,11822,10000,円
2021-08-03,2021-08-03,Global equities (excluding Japan),6454,15495,10000,円
2021-08-03,2021-08-03,S&P500,6013,16632,10000,円


List of asset names which will be referenced later

In [3]:
assets = df_trades['name'].unique()
assets

array(['TOPIX', 'Domestic Reit', 'Advanced government bond',
       'Global equities (excluding Japan)', 'S&P500', 'NASDAQ100'],
      dtype=object)

# Download price histories

Download historical price data from an online source. Here, I used emaxis.jp. Modifications may be necessary for other sources.

You will need to curate your own url list for your instruments in the trade history.

In [4]:
#! Warning! Running this cell will overwrite existing historical files.

import urllib.request

# download price histories and save csv files to /historical_data
# will take a few second depending on the number of items in url_list.

# keys must match the asset names in 'assets'
url_list = {'TOPIX':
            'https://emaxis.jp/content/csv/fundCsv.php?fund_cd=252634',

            'Domestic Reit':
            'https://emaxis.jp/content/csv/fundCsv.php?fund_cd=253669',

            'Advanced government bond':
            'https://emaxis.jp/content/csv/fundCsv.php?fund_cd=252667',

            'Global equities (excluding Japan)':
            'https://emaxis.jp/content/csv/fundCsv.php?fund_cd=253209',

            'S&P500':
            'https://emaxis.jp/content/csv/fundCsv.php?fund_cd=253266',

            'NASDAQ100':
            'https://emaxis.jp/content/csv/fundCsv.php?fund_cd=254062'}


for key in url_list:
    url = url_list[key]
    title = "historical_data/{0}.csv".format(key)
    urllib.request.urlretrieve(url,title)

# check consistency of url_list and assets (all assets must be defined in url_list)
if (not set(assets).issubset(set(url_list.keys()))):    
    raise Exception('Asset(s) missing in the url_list. Check the asset names and url_list.')

In [5]:
# individual price history files are combined into one dataframe 
df_hist_all = pd.DataFrame()

for asset in assets:
    asset_file = "historical_data/{0}.csv".format(asset)
    df_asset = pd.read_csv(asset_file, skiprows = [0,1], names = ['date','nav_void', 'nav','div', 'aum'],
                    parse_dates = True, index_col = 0, encoding='Shift JIS')

    df_asset[asset] = df_asset['nav']
    df_asset = df_asset.loc[:,[asset, 'div']]

    df_hist_all[asset] = df_asset[asset]

df_hist_all.to_csv('df_hist_all.csv')

# Build unified data structure

Combine historical price data and trading data. Dictionary of dataframes with keys as asset names

In [9]:
df_hist_all = pd.read_csv('df_hist_all.csv', parse_dates=True)
df_hist_all['date'] = pd.to_datetime(df_hist_all['date'])
df_hist_all.index = df_hist_all['date']

# trim the data from a start date
df_hist_all = df_hist_all['2021-6-1':]

# extract df_hist_all according to asset name and put them into a dictionary
dict_assets = {}

for asset in assets:
    df_asset = pd.DataFrame(df_hist_all.loc[:,asset]) 
    df_asset.rename(columns={asset:'market price'}, inplace=True)

    # merge historical price and trading data
    df_asset = df_asset.merge(df_trades[df_trades['name']==asset], how='left', left_index=True, right_index=True)
    dict_assets[asset] = df_asset.drop(['name'], axis=1)


mlayout['title'] = 'Relative price increase from start date'
mlayout['width'] = 1200
fig = go.Figure(layout=mlayout)

for name, df_asset in dict_assets.items():
    starting_price = df_asset.loc[df_asset['market price'].first_valid_index(), 'market price']
    fig.add_trace(go.Scatter(x=df_asset.index, y=df_asset['market price']/starting_price, name=name))   

# save figure as png
# fig.write_image("relative_price_increase.png")
fig.write_html("relative_price_increase.html")
fig.show()

In [10]:
dict_assets['TOPIX'].head()

Unnamed: 0_level_0,market price,date,quantity,buy price,cost/(point),currency
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-06-01,13665,NaT,,,,
2021-06-02,13781,NaT,,,,
2021-06-03,13896,NaT,,,,
2021-06-04,13899,NaT,,,,
2021-06-07,13912,NaT,,,,


# Average acquisition price

In [11]:
for name, df_asset in dict_assets.items():
    
    df_asset['avg acquisition'] = np.nan
    df_asset['accum spending']  = np.nan
    df_asset['accum quantity']  = np.nan

    cumulative_spending = 0.
    cumulative_quantity = 0

    # calculate average acquisition price and total spending accumulatively
    for i, row in df_asset.iterrows():
        if not np.isnan(row['quantity']):

            buy_price = row['buy price']
            quantity  = row['quantity'] 

            cumulative_spending += buy_price * quantity/10000  # in units of 10,000 shares
            cumulative_quantity += quantity

            row['avg acquisition'] = cumulative_spending/cumulative_quantity * 10000
            row['accum spending']  = cumulative_spending
            row['accum quantity']  = cumulative_quantity
            
            df_asset.loc[i] = row
            

    # plot chart for current asset
    mlayout['title'] = name
    fig = go.Figure(layout=mlayout)

    fig.add_scatter(x=df_hist_all['date'], y=df_hist_all[name], opacity=0.7, name='Market price')

    fig.add_scatter(x=df_asset['date'], y=df_asset['buy price'], mode='lines+markers', connectgaps=True, name='Buy price')
    fig.add_scatter(x=df_asset['date'], y=df_asset['avg acquisition'], mode='markers+lines', connectgaps=True, name='Avg. acqusition')

    fig.show()

# Profit & loss

Calculate profit history by substracting average acquisition price from the asset price on each date.

In [12]:
for name, df_asset in dict_assets.items():

    # interpolate (frontfill) average acquisition price and holding quantity timehistory
    df_asset['interp avg acq'] = df_asset['avg acquisition'].interpolate('ffill')
    df_asset['accum quantity'] = df_asset['accum quantity'].interpolate('ffill')

    # calculate profit
    df_asset['profit per share'] = pd.DataFrame(df_asset['market price'] - df_asset['interp avg acq'])
    df_asset['profit'] = df_asset['profit per share'] * df_asset['accum quantity']/10000  # in ten-thousandth of shares

    # plot chart to verify interpolated average acquisition price
    mlayout['title'] = name
    fig = go.Figure(layout=mlayout)
    
    fig.add_scatter(x=df_asset.index, y=df_asset['market price'], opacity=0.7, name='Market price')
    fig.add_scatter(x=df_asset.index, y=df_asset['avg acquisition'], mode='markers+lines', connectgaps=True, name='Avg. acqusition')
    fig.add_scatter(x=df_asset.index, y=df_asset['interp avg acq'], mode='lines', connectgaps=False, name='Intperpolated avg. acq')
    
    # fig.show()

In [13]:
# plot profit histories
for name, df_asset in dict_assets.items():
    mlayout['title'] = name
    fig = go.Figure(layout=mlayout)

    fig.add_scatter(x=df_asset.index, y=df_asset['profit'], connectgaps=True, name='Profit')

    print(name, 'average %5.2f' %np.mean(df_asset['profit']))
    
    fig.show()

TOPIX average 3254.88


Domestic Reit average -1935.79


Advanced government bond average 156.31


Global equities (excluding Japan) average 4437.80


S&P500 average 4655.87


NASDAQ100 average -158.20


## Profit statistics

In [14]:
# per share statistics removes the effect of the number of holding shares
mlayout['title'] = 'Statistics of the profit history (per share)'
fig = go.Figure(layout=mlayout)

for name, df_asset in dict_assets.items():
    fig.add_trace(go.Box(y=df_asset['profit per share'], name=name))

fig.show()

In [15]:
mlayout['title'] = 'Statistics of the profit history (actual)'
fig = go.Figure(layout=mlayout)

for name, df_asset in dict_assets.items():
    fig.add_trace(go.Box(y=df_asset['profit'], name=name))

fig.show()

In [16]:
mlayout['title'] = 'Accumulated spending'
fig = go.Figure(layout=mlayout)

for name, df_asset in dict_assets.items():
    fig.add_trace(go.Scatter(x=df_asset.index, y=df_asset['accum spending'], mode='markers+lines', connectgaps=True, name=name))

fig.show()