Return Calculation Methods
https://dspyt.com/simple-returns-log-return-and-volatility-simple-introduction


In [2]:
import os
import sys
import pandas as pd
import numpy as np
import time
from dataclasses import dataclass

In [3]:
HOME_DIR = os.path.expanduser('~/')
sys.path.append(f"{HOME_DIR}/Documents/Algo/Stock Price DB/")
from StockPriceData import process_ticker
script_path = HOME_DIR+'/Documents/Algo/Strategies/Using Pandas/Momentum/Multiple Timeframes'
cached_data = script_path+'/'+'cached_data'
stats_data = script_path+'/'+'stats'
data_folder = f'{HOME_DIR}/Documents/Algo/Data'
ticker_folder = f'{data_folder}/Tickers/Nasdaq-100'
current_date = time.strftime("%Y.%m.%d.%H%M%S")
daily_price_csv = f'{cached_data}/nasdaq100_daily_prices.csv'
benchmark_ticker: str = '^IXIC' # (NASDAQ) #'SPY' # (S&P500)

In [4]:
@dataclass
class DataClassParameters:
    benchmark_start_date: str = '2010-01-01'
    start_date: str = '2010-01-01'#'2010-01-01'
    end_date: str = '2022-12-23'
    benchmark_ticker: str = '^IXIC' # (NASDAQ) #'SPY' # (S&P500)
    synthetic_benchmark: bool = False#True
    index_yearly_members = None # DF containing the index members for each year
    ticker_csv: str = None
    daily_price_csv: str = None
    stats_csv: str = None
    tickers: list = None
    current_tickers = None
    refresh_tickers: bool = False
    refresh_data: bool = False #True
    performance_measure: str = 'perf_score' #'tot_ret' # What we'll use to sort our return data
    resample_period: str = 'M' #'M' #'W'
    stop_type: str =  'indiv' #'avg'

parameter_data = DataClassParameters()#qty_long_period=30,qty_med_period=20,qty_short_period=2,SMA_S=(10),SMA_L=(146), rolling_l=(11), rolling_m=(4),rolling_s=(9),stop=41)
           

In [5]:

all_daily_prices_df = pd.read_csv(daily_price_csv, index_col=['Date'], parse_dates=True)
print(f'Loaded prices for {all_daily_prices_df.shape[1]} tickers.')
daily_prices_df = all_daily_prices_df.copy()
daily_prices_df['CASH'] = 0.0 # We set to 0 to simulate 0% return as alternative to other investments when they're negative


Loaded prices for 100 tickers.


In [6]:
#Benchmark return
benchmark_price_df = pd.DataFrame(process_ticker(parameter_data.benchmark_ticker,parameter_data.benchmark_start_date,parameter_data.end_date)['Adj_Close'])#, name = benchmark) # Grab more data than we need since we'll be losing some to indicator warmup
tot_retun = (benchmark_price_df.iloc[-1]['Adj_Close']/benchmark_price_df.iloc[0]['Adj_Close']) - 1
benchmark_price_df['daily_pct_returns'] = benchmark_price_df.pct_change().fillna(0)
benchmark_price_df['daily_log_returns'] = (np.log(benchmark_price_df['Adj_Close']).diff()).fillna(0)
benchmark_price_df['cum_daily_pct_returns'] = (1 + benchmark_price_df['daily_pct_returns']).cumprod() - 1
benchmark_price_df['cum_daily_log_returns'] = (benchmark_price_df['daily_log_returns']).cumsum()
benchmark_price_df['cum_daily_unlog_returns'] = np.exp(benchmark_price_df['cum_daily_log_returns'])-1
benchmark_price_df.tail(10)

Retrieving price data for ^IXIC from 2010-01-01 - 2022-12-23
Connecting to DB on rpi4.
Latest price data for ^IXIC (2023-04-14) >= required end date (2022-12-23).
(3268, 8)


Unnamed: 0_level_0,Adj_Close,daily_pct_returns,daily_log_returns,cum_daily_pct_returns,cum_daily_log_returns,cum_daily_unlog_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-12,11143.74,0.012642,0.012563,3.827432,1.574315,3.827432
2022-12-13,11256.81,0.010147,0.010095,3.876413,1.58441,3.876413
2022-12-14,11170.89,-0.007633,-0.007662,3.839193,1.576748,3.839193
2022-12-15,10810.53,-0.032259,-0.032791,3.683086,1.543957,3.683086
2022-12-16,10705.41,-0.009724,-0.009771,3.637549,1.534186,3.637549
2022-12-19,10546.03,-0.014888,-0.015,3.568506,1.519186,3.568506
2022-12-20,10547.11,0.000102,0.000102,3.568974,1.519289,3.568974
2022-12-21,10709.37,0.015384,0.015267,3.639264,1.534556,3.639264
2022-12-22,10476.12,-0.02178,-0.022021,3.538221,1.512535,3.538221
2022-12-23,10497.86,0.002075,0.002073,3.547639,1.514608,3.547639


In [7]:
# Total Return Calculation Methods (they should all return the same value):
benchmark_return_data = {}
benchmark_return_data['total_simple_return'] = (1 + benchmark_price_df['daily_pct_returns']).prod() - 1
benchmark_return_data['total_log_return'] = np.exp((benchmark_price_df['daily_log_returns']).sum()) - 1
benchmark_return_data['total_simple_cum_return'] = (benchmark_price_df['cum_daily_pct_returns'][-1])
benchmark_return_data['total_log_cum_return'] = np.exp(benchmark_price_df['cum_daily_log_returns'][-1]) -1
benchmark_return_data

{'total_simple_return': 3.5476386446140884,
 'total_log_return': 3.547638644614066,
 'total_simple_cum_return': 3.5476386446140884,
 'total_log_cum_return': 3.547638644614066}

In [8]:
# OLD RETURN CALC METHODS
daily_returns_old = (daily_prices_df.pct_change()+1)[1:] # Adding 1 allows us to take the product of returns. Can't use cumsum though. This fucks things up for stocks that go to zero!
weekly_returns_old = daily_returns_old.resample('W').prod() # Need to subtract 1 to get actual % returns.
monthly_returns_old = daily_returns_old.resample('M').prod() # Need to subtract 1 to get actual % returns.



In [11]:
# NEW METHODS - VERIFIED
return_data = {}
# Calculate daily returns
daily_prices_df = daily_prices_df.loc[:,['ATVI','ADBE','ADP','ABNB','CASH']].copy()
daily_pct_returns = (daily_prices_df.pct_change().fillna(0))#[1:]
daily_log_returns = (np.log(daily_prices_df).diff()).fillna(0)
cum_daily_pct_returns = (1 + daily_pct_returns).cumprod() - 1
cum_daily_log_returns = (daily_log_returns).cumsum()

# Monthly Returns 
monthly_prices = daily_prices_df.resample('M').agg(lambda x: x[-1]) # Resample to longer time frame, taking last value from each period using aggregation function
monthly_manual_returns = ((monthly_prices / monthly_prices.shift(1)) - 1).fillna(0) # Manual percentage change calculation
monthly_pct_returns = (monthly_prices.pct_change()).fillna(0) # Using pandas method
monthly_pct_returns2 = ((daily_pct_returns+1).resample('M').prod()-1).fillna(0)
monthly_log_returns = (daily_log_returns.resample('M').sum()).fillna(0)
monthly_unlog_returns = (np.exp(daily_log_returns.resample('M').sum())-1).fillna(0)

# Average returns across portfolio holdings
avg_daily_pct_returns = daily_pct_returns.mean(axis=1)#.values[0] # Use this if you are using actual percentage returns
avg_daily_log_returns = daily_log_returns.mean(axis=1)#.values[0] # Use this if you are using actual percentage returns
avg_monthly_pct_returns = monthly_pct_returns.mean(axis=1)#.values[0] # Use this if you are using actual percentage returns
avg_monthly_log_returns = monthly_log_returns.mean(axis=1)#.values[0] # Use this if you are using actual percentage returns
avg_daily_gross_returns = 1 + monthly_log_returns.mean(axis=1) # Need to subtract 1 to get pct return

# Total Returns:
return_data['total_simple_return'] = (1 + daily_pct_returns).prod() - 1
return_data['total_log_return'] = np.exp((daily_log_returns).sum()) - 1
return_data['total_simple_cum_return'] = (cum_daily_pct_returns.iloc[-1])
return_data['total_log_cum_return'] = np.exp(cum_daily_log_returns.iloc[-1]) -1

# Print Results
#print(f'{daily_pct_returns.head(5)}\n{daily_log_returns.head(5)}')
#print(f'{cum_daily_pct_returns.head(5)}\n{cum_daily_log_returns.head(5)}')
#print(f'{monthly_manual_returns.head(5)}\n{monthly_pct_returns.head(5)}\n{monthly_log_returns.head(5)}\n{monthly_unlog_returns.head(5)}')
#print(f'{monthly_pct_returns.head(5)}\n{monthly_pct_returns2.head(5)}\n{monthly_log_returns.head(5)}')
#print(f'{daily_pct_returns.head()}\n{avg_daily_pct_returns.head()}')
#print(f'{avg_daily_pct_returns.head()}\n{avg_daily_log_returns.head()}')
print(f'{avg_monthly_pct_returns.head()}\n{avg_monthly_log_returns.head()}\n{avg_daily_gross_returns.head()}')



Date
2010-01-31    0.000000
2010-02-28    0.030587
2010-03-31    0.046387
2010-04-30   -0.031011
2010-05-31   -0.026547
Freq: M, dtype: float64
Date
2010-01-31   -0.058671
2010-02-28    0.029696
2010-03-31    0.044128
2010-04-30   -0.032014
2010-05-31   -0.027193
Freq: M, dtype: float64
Date
2010-01-31    0.941329
2010-02-28    1.029696
2010-03-31    1.044128
2010-04-30    0.967986
2010-05-31    0.972807
Freq: M, dtype: float64


  result = func(self.values, **kwargs)


In [10]:
#all_daily_prices_df.head()
abnb_prices = daily_prices_df['ABNB']
abnb_returns = monthly_return_pct_chg['ABNB']

for date in abnb_returns.index:
    #print(f'{date}:  {abnb_returns[date]}')
    if abnb_returns[date] > 0 and abnb_returns[date-1*date.freq] == 0:
        print(f'Turned positive on {date}')

NameError: name 'monthly_return_pct_chg' is not defined

In [None]:
abnb_prices['2021-01-25':'2021-02-01']

In [None]:
abnb_returns['2020-11-30':'2021-03-31']

In [None]:
abnb_log_returns = monthly_log_returns['ABNB']
abnb_log_returns['2020-11-30':'2021-03-31']