# Background



In this exercise, we implement the concept of structural stochastic volatility which derives from different noise levels in the demand of fundamentalist and chartists and time varying market share of the two groups. 

We consider the || name here || approach of endogenous switching between the trading strategies and estimate the model by method of simulated moments. where choice of moments reflects the basic stylized facts of daily returns of a stock market index. 

In [1]:
%load_ext Cython

In [11]:
# %%cython

# import numpy as np
# import cython

In [8]:
import numpy as np
import time
import argparse
import pandas as pd
import seaborn as sns
from statsmodels.tsa.stattools import acf
from yahoofinancials import YahooFinancials
import scipy.optimize as opt
from arch import arch_model
from scipy.stats import kurtosis, skew
import matplotlib as mpl
import statsmodels.api as sm
import matplotlib.pyplot as plt

sns.set()
import os, pickle as pkl
plt.rcParams['figure.figsize'] = (15, 6)
from itertools import product

LAG_1_AUTOCORRELATION_RAW = 'Lag-1 Autocorrelation (raw)'
MEAN_ABS = 'Mean (absolute)' 
HILL_ESTIMATE = 'Hill estimator (absolute)' 
LAG_1_AUTOCORRELATION_ABS = 'Lag-1 Autocorrelation (absolute)'
LAG_5_AUTOCORRELATION_ABS = 'Lag-5 Autocorrelation (absolute)'
LAG_10_AUTOCORRELATION_ABS = 'Lag-10 Autocorrelation (absolute)'
LAG_25_AUTOCORRELATION_ABS = 'Lag-25 Autocorrelation (absolute)'
LAG_50_AUTOCORRELATION_ABS = 'Lag-50 Autocorrelation (absolute)'
LAG_100_AUTOCORRELATION_ABS = 'Lag-100 Autocorrelation (absolute)'



In [4]:
%matplotlib inline



## Fetch the data 

Data extraction successful....
Summary: 

               ^GSPC
count  10232.000000
mean    1027.420476
std      777.388608
min       98.220001
25%      333.962494
50%     1006.740021
75%     1390.694946
max     3386.149902


In [41]:
%%cython --annotate

import numpy as np
import time
import argparse
import pandas as pd
import seaborn as sns
from statsmodels.tsa.stattools import acf
from yahoofinancials import YahooFinancials
import scipy.optimize as opt
from arch import arch_model
from scipy.stats import kurtosis, skew
import matplotlib as mpl
import statsmodels.api as sm
import matplotlib.pyplot as plt
import sys

sns.set()
import os, pickle as pkl
plt.rcParams['figure.figsize'] = (15, 6)
from itertools import product

LAG_1_AUTOCORRELATION_RAW = 'Lag-1 Autocorrelation (raw)'
MEAN_ABS = 'Mean (absolute)' 
HILL_ESTIMATE = 'Hill estimator (absolute)' 
LAG_1_AUTOCORRELATION_ABS = 'Lag-1 Autocorrelation (absolute)'
LAG_5_AUTOCORRELATION_ABS = 'Lag-5 Autocorrelation (absolute)'
LAG_10_AUTOCORRELATION_ABS = 'Lag-10 Autocorrelation (absolute)'
LAG_25_AUTOCORRELATION_ABS = 'Lag-25 Autocorrelation (absolute)'
LAG_50_AUTOCORRELATION_ABS = 'Lag-50 Autocorrelation (absolute)'
LAG_100_AUTOCORRELATION_ABS = 'Lag-100 Autocorrelation (absolute)'

class DataLoader:
    def __init__(self, assets, start_date, end_date, frequency='daily'):
        self.assets = assets
        self.start_date = start_date
        self.end_date = end_date
        self.frequency = frequency
        
    
    def get_empirical_data(self, log_returns = True, commentary=True):
        yahoo_financial = YahooFinancials(self.assets)
        try:
            data = yahoo_financial.get_historical_price_data(start_date=self.start_date,
                                                             end_date=self.end_date,
                                                             time_interval=self.frequency)
            df = pd.DataFrame({
            a: {x['formatted_date']: x['adjclose'] for x in data[a]['prices']} for a in assets
        })
        except:
            print("Not able to download data using yahoo financials. System will exit here.")
            sys.exit(0)
            
        df = df.set_index(pd.DatetimeIndex(df.index))
        
        if log_returns:
            df = np.log(df / df.shift(1)).dropna()
            
        if commentary:
            print("Data extraction successful....\nSummary: \n\n", df.describe())
            
        return df
    

        
def calculate_series_return(series, log=False):
    if log:
        return (np.log(series) - np.log(series.shift(1))).dropna()
    else:
        return (series - series.shift(1)).dropna()

class Moments:
    @staticmethod
    def get_numerical_moments(float[:] series, int[:] moment_list, averaging_offset=None):
        print("AAYA yahan")
        cdef int moment_max
        if averaging_offset is None:
            moment_max = np.max(moment_list)
        else:
            moment_max = np.max(moment_list) + averaging_offset
        
        autocorr = acf(series, unbiased=True, nlags=moment_max, fft=False)
        
        results = []
        
        if averaging_offset is None:
            results = [autocorr[moment] for moment in moment_list]
        else:
            results = [np.mean(autocorr[1:3]) if moment==1 \
                       else np.mean(autocorr[moment-averaging_offset:moment + averaging_offset+1])\
                      for moment in moment_list]
    
        return results
    @staticmethod
    def get_hill_estimator(series, k=0.05):
        abs_series = np.sort(np.abs(series))
        num_terms = int(k*len(abs_series))
        gamma = np.mean(np.log(abs_series[-num_terms:])) - abs_series[-(num_terms+1)]
        return 1.0/gamma
    
    
class HPM:
    
    HPM_INIT = {
                    "mu": 0.01, 
                    "phi": 0.12,
                    "chi": 1.5,
                    "pstar": 100, 
                    "sigma_f": 0.758,
                    "sigma_c": 2.087 
                }
    
    
    def optimise_criterion(self, params_dict_values, actual_ts_moments, weighing_matrix, params_dict_keys, price0, ts_length, buffer_multiplier=5):
        
        # reconstruct params dict for simulation
        params_dict = {params_dict_keys[i]:params_dict_values[i] for i in range(len(params_dict_keys))}

        
        # Calculating simulated price
        simulated_price = self.simulate(param_dict = params_dict, 
                                        num_paths = 1, 
                                        length=buffer_multiplier*ts_length, 
                                        price0=price0)
        # Ignore first few points
        simulated_price = simulated_price[ts_length:, :]
        
        # Simulated price returns
        simulated_price_returns = np.log(simulated_price[1:, :]) - np.log(simulated_price[:-1, :])
        
        # Simulated time series moments 
        simulated_ts_moments, _ = self.get_moments(simulated_price_returns[:, 0])
        
        # Error between simulated and actual moments 
        error = np.array(actual_ts_moments).reshape(1, -1) - np.array(simulated_ts_moments).reshape(1, -1)
        
        return (error @ weighing_matrix @ error.T).flatten()[0]
    
    def calibrate(self, initial_params_dict, double[:] ts, weighing_matrix, price0, n_max_iter=1000, grid_init_search=True):
        # Separate keys and values 
        keys, values = list(initial_params_dict.keys()), tuple(initial_params_dict.values())

        if grid_init_search:
            minima = np.inf
            minima_dict = None
            for dictionary in self.get_shocked_dictionaries(initial_params_dict):
                error_value = self.optimise_criterion(dictionary, ts, weighing_matrix, keys, price0)
                if error_value <= minima:
                    minima_dict = dictionary.copy()
            
            init_params_dict = minima_dict.copy()

        # Minimize the weighted error
        empirical_moments, _ = self.get_moments(ts)
        calibrated_result = opt.minimize(self.optimise_criterion, values, args=(empirical_moments, weighing_matrix, keys, price0, len(ts)),\
                                        method='Nelder-Mead', options={'maxiter': n_max_iter, 'disp': True})
        
        # Calibrated parameters
        calibrated_params = {keys[i]:calibrated_result.x[i] for i in range(len(keys))}
        
        return calibrated_params
    
    
    def get_shocked_dictionaries(self, init_params_dict, shock=0.2, num_points=5):
        keys, values = list(init_params_dict.keys()), list(init_params_dict.values())
        values_modified = list(product(*[list(np.linspace((1-shock)*val, (1+shock)*val, num_points)) for val in values]))
        for val in values_modified:
            yield {keys[i]: val[i] for i in range(len(keys))}
            
    
    
    def get_moments(self, ts):
        '''
        Returns the moments in the following order:
        ['Lag 1 autocorrelation from raw returns', 'Mean absolute level', 'Lag 1 autocorrelation from abs returns',
        'Lag 5 autocorrelation from abs returns', 'Lag 10 autocorrelation from abs returns', 
        'Lag 25 autocorrelation from abs returns', 'Lag 50 autocorrelation from abs returns',
        'Lag 100 autocorrelation from abs returns', 'Hill estimator']
        '''
        
        
        m = []
        
        abs_ts = np.abs(ts)
        
        
        # First order autocorrelation coefficient from raw returns
        m = m + Moments.get_numerical_moments(ts, [1])
        
        # Mean of absolute returns 
        m = m + [np.mean(abs_ts)]
        
        # Hill estimator 
        m = m + [Moments.get_hill_estimator(abs_ts)]
        
        # lag [1,5] from absolute returns 
        m = m + Moments.get_numerical_moments(abs_ts, [1, 5, 10, 25, 50, 100], averaging_offset=1)
        
        return m, [LAG_1_AUTOCORRELATION_RAW, MEAN_ABS, HILL_ESTIMATE, 
                   LAG_1_AUTOCORRELATION_ABS, LAG_5_AUTOCORRELATION_ABS, 
                   LAG_10_AUTOCORRELATION_ABS, LAG_25_AUTOCORRELATION_ABS, 
                  LAG_50_AUTOCORRELATION_ABS, LAG_100_AUTOCORRELATION_ABS]
    
        
    def get_weighing_matrix(self, actual_ts, num_bootstraps=5000,  method = 'block-bootstrap'):
        
        if method.lower() =='block-bootstrap':
            
            moment_samples = np.zeros((9, num_bootstraps))
            abs_ts = np.abs(actual_ts)
            
            num_blocks_250 = len(actual_ts) // 250
            num_blocks_750 = len(actual_ts) // 750
            
            for i in range(num_bootstraps):
                
                m = []
                
                # numeric_moments_absolute lags = [1, 5, 10, 25, 100]
                modified_ts = np.array([])
                
                for j in range(num_blocks_250):
                    slice_start = np.random.randint(num_blocks_250)*250
                    modified_ts = np.append(modified_ts, actual_ts[slice_start:slice_start+250])
                
                m = self.get_moments(modified_ts)[0][:-4]

                modified_abs_ts = []
                
                for j in range(num_blocks_750):
                    slice_start = np.random.randint(num_blocks_750)*750
                    modified_abs_ts = np.append(modified_abs_ts, abs_ts[slice_start:slice_start+750])
            
                m = m + self.get_moments(modified_abs_ts)[0][-4:]
                moment_samples[:, i] = m
                
            weighing_matrix = np.linalg.inv(np.cov(moment_samples))
            
        elif method.lower()=='equal':
            weighing_matrix = np.eye(9)
            
        return weighing_matrix
    
        
    
    def simulate(self, param_dict, num_paths, price0, length,  seed=1001):
        
        np.random.seed(seed)
        
        price = np.zeros((length+1, num_paths))
        demand_fundamentalist = np.zeros((length+1, num_paths))
        demand_chartist = np.zeros((length+1, num_paths))
        num_chartist = np.ones((length+1, num_paths))*0.5
        num_fundamentalist = np.ones((length+1, num_paths))*0.5
        
        price[0, :] = np.array([price0]*num_paths)

        for t in range(1, length+1):
            price[t, :] = price[t-1, :] + param_dict['mu']*(num_fundamentalist[t-1, :]*demand_fundamentalist[t-1, :] \
                                                     + num_chartist[t-1, :]*demand_chartist[t-1, :])
            
            demand_fundamentalist[t, :] = param_dict['phi']*(param_dict['pstar'] - price[t,:]) \
                                        + np.random.normal(scale= param_dict['sigma_f'], size=num_paths)
            demand_chartist[t, :] = param_dict['chi']*(price[t]-price[t-1]) \
                                        + np.random.normal(scale= param_dict['sigma_c'], size=num_paths)
             
        return price
    
    
    
def yield_slices(series, slice_length, min_size=200):
    slice_index = 0
    while True:
        print("ys")
        sliced_series = series[slice_length*slice_index:slice_length*(slice_index+1)]
        if len(sliced_series) >= min_size: 
            yield sliced_series
            
        slice_index +=1
        
        
def write_timer():
    elapsed_time = None
    
    while True:
        if elapsed_time is None:
            elapsed_time = 0
            last_time = time.time()
            yield ""
        else:
            elapsed_time = time.time() - last_time
            last_time = time.time()
            yield elapsed_time
            

            
            
            
assets = ['^GSPC']
assets_dict = {'^GSPC':'SPX'}
start_date = '1979-12-31'
end_date = '2020-07-29'
frequency = 'daily'

dl = DataLoader(assets, start_date, end_date, frequency=frequency)
df = dl.get_empirical_data(log_returns=False)
            
num_paths = 1000
slice_length = 500
n_optimizer_iterations = 1000

cdef double [:] asset_log_price_ts_view

for asset in assets:
    asset_log_price_ts = np.log(df[asset])
    asset_log_price_ts_view = asset_log_price_ts
    
    hpm_obj = HPM()
    
    for price_ts in yield_slices(asset_log_price_ts_view, slice_length, min_size=200):
        return_ts = calculate_series_return(price_ts)
        
        weighing_matrix = hpm_obj.get_weighing_matrix(return_ts, method='equal')
        
        init_params_dict = HPM.HPM_INIT
        
        init_params_dict['pstar'] = np.mean(return_ts)
        
        timer = write_timer(); next(timer)
        
        
        calibrated_params_dict = hpm_obj.calibrate(initial_params_dict = init_params_dict, 
                                              ts = return_ts.values, 
                                              weighing_matrix = weighing_matrix,
                                              price0= price_ts[0],
                                              n_max_iter = n_optimizer_iterations, 
                                              grid_init_search=False)
        
        print("Time taken for calibration :", next(timer))
        price_simulated = hpm_obj.simulate(calibrated_params_dict,
                                           num_paths=num_paths,
                                           price0= price_ts[0],
                                           length=5*len(asset_log_price_ts))
        
        print("Time taken for simulation :", next(timer))
        price_simulated = price_simulated[len(price_ts):, :]
        
        returns_simulated = price_simulated[1:, :] - price_simulated[:-1, :]
        write_timer()
        moments_simulated = [hpm_obj.get_moments(returns_simulated[:, i])[0] for i in range(num_paths)]
        
        moments_actual, headers = hpm_obj.get_moments(return_ts)
        
        moments_simulated_df = pd.DataFrame(np.array(moments_simulated), columns=headers)
        
        print("Time taken to get simulated moments :", next(timer))
        for header in headers:
            fig, ax = plt.subplots()
            ax.hist(moments_simulated_df[header], density=True, bins=50)
            ax.axvline(moments_actual[headers.index(header)], color='red')
            plt.title(header + " (Datapoints: " + len(price_ts) + ")")
            if not os.path.isdir(header):
                os.mkdir(header)
            plt.show()
            plt.savefig(os.path.join(header, price_ts.index[0].strftime("%Y-%m-%d") + "-" + price_ts.index[-1].strftime("%Y-%m-%d") + '.png'), format='png')
        
        
        
        

Data extraction successful....
Summary: 

               ^GSPC
count  10232.000000
mean    1027.420476
std      777.388608
min       98.220001
25%      333.962494
50%     1006.740021
75%     1390.694946
max     3386.149902


TypeError: a bytes-like object is required, not 'Series'

### Approach 1: 

In this approach, we assume rollinig slices of 2 year time points and weighing matrix is assumed to be identity matrix. 

In [None]:
gen = write_timer()

In [None]:
next(write_timer("haan"))

In [None]:

    
gen = write_timer()

In [None]:
next(gen)

In [None]:
if abraka is not None:
    print("Ok")

In [None]:
max(np.array([1,2,3]))

In [34]:
df[asset].values

array([ 107.94000244,  105.76000214,  105.22000122, ..., 3215.62988281,
       3239.40991211, 3218.43994141])