In [15]:
import pickle
import pandas as pd
# open_data = pd.read_csv('open_data.csv', index_col=0, parse_dates=True)
with open('optimal_pairs_by_semester.pkl', 'rb') as f:
    optimal_pairs_by_semester = pickle.load(f)
with open('open_bysemester.pkl', 'rb') as f:
    open_bysemester = pickle.load(f)    
with open('new_result_dict.pickle', 'rb') as file:
    new_result_dict = pickle.load(file)
with open('riskfree_bysemester.pkl', 'rb') as file:
    riskfree_bysemester = pickle.load(file)

In [14]:
pair_results = {}
# Iterate through semesters
for i in range(1, len(optimal_pairs_by_semester) + 1):
    semester_pairs = optimal_pairs_by_semester['Semester_' + str(i)]
    open_prices_next_semester = open_bysemester['Semester_' + str(i)] # del +1 for IN-SAMPLE
    
    semester_pair_data = {}

    # Iterate through pairs in the current semester
    for idx, pair in enumerate(semester_pairs):
        security_0_open = open_prices_next_semester[pair[0]]
        security_1_open = open_prices_next_semester[pair[1]]
        
        pair_data = pd.DataFrame({
            'open0': security_0_open,
            'open1': security_1_open
        })
        
        # Store the DataFrame for the pair in the semester_pair_data dictionary
        semester_pair_data[idx] = pair_data
    
    # Store the dictionary of DataFrames for the semester in the pair_results dictionary
    pair_results['Semester_' + str(i)] = semester_pair_data #del +1

In [57]:
import statsmodels.api as sm
def calc_zscore(spread):
    zscore = (spread - np.mean(spread))/np.std(spread)
    return zscore
def calc_norm_spread(pair, lookback_dict, i):
    security_0 = pair[0]
    security_1 = pair[1]

    end_index = len(open_bysemester['Semester_' + str(e)])  +i
    start_index = i # + int(len(open_bysemester['Semester_' + str(e)]) * 2/3)
    window_data0 = lookback_dict[start_index:end_index][security_0]
    window_data1 = lookback_dict[start_index:end_index][security_1]

    x = sm.add_constant(np.asarray(window_data0))
    y = np.asarray(window_data1)

    model = sm.OLS(y, x)
    result = model.fit()

    alpha, beta = result.params[0], result.params[1]

    spread_res = y - (alpha + beta * x.T[1])
    
    norm_spread = calc_zscore(spread_res)

    return pd.Series(norm_spread), beta

def pnl(risk_free, prices_dict, optimal_pairs, lookback_dict, closelong, closeshort, sellth, buyth, fee):
    last_cum_returns = []
    pair_returns = []
    pair_daily_returns = []
    """
prices_dict contains open0 and open1
    """
    for pair, df in zip(optimal_pairs, prices_dict.values()):
        signals0 = np.zeros(len(df))
        pos1 = np.zeros(len(df))
        pos0 = np.zeros(len(df))
        days_since_trade = 0
        beta = 0
        
        for i in range(0, len(df)): 
            if i < 1:
                prev_signal = 0
            else:
                prev_signal = signals0[i - 1]

            norm_spread, curr_beta = calc_norm_spread(pair, lookback_dict, i)
            curr_spread = norm_spread.iloc[-1]  # Extract the last value of the Series

            if days_since_trade % 6 == 0:
                # Update beta every 5 days
                beta = curr_beta
            if (curr_spread >= sellth and prev_signal == 0) or (curr_spread <= buyth and prev_signal == 0):
                # Enter trade at current_beta
                signals0[i] = -1 if curr_spread >= sellth else 1
                days_since_trade = 1  # Reset the days since trade
                beta = curr_beta  # Set beta to current beta
            elif closelong < curr_spread and prev_signal == 1:
                signals0[i] = 0
            elif closeshort > curr_spread and prev_signal == -1:
                signals0[i] = 0
            else:
                signals0[i] = prev_signal
            
            if days_since_trade > 0:
                days_since_trade += 1
            
            if abs(beta) <= 1:
                pos0[i] = -signals0[i] # * beta 
                pos1[i] = signals0[i] 
            else:
                pos0[i] = -signals0[i]
                pos1[i] = signals0[i] #/ beta
                
        df['pos0'] = pos0
        df['pos1'] = pos1
        
        pos0[-1] = 0
        pos1[-1] = 0
        
        df['pos0_diff'] = df['pos0'].diff().abs()
        df['pos1_diff'] = df['pos1'].diff().abs() # units buyed or selled for transactions cost
        
        df['logret_stock0'] = np.log(df['open0'] / df['open0'].shift(1))
        df['logret_stock1'] = np.log(df['open1'] / df['open1'].shift(1))
        
        df['logret_leg0'] = df['logret_stock0'] * df['pos0'].shift(1) - (df['pos0_diff'] * fee) 
        df['logret_leg1'] = df['logret_stock1'] * df['pos1'].shift(1) - (df['pos1_diff'] * fee)

        df['cumulative_return0'] = df['logret_leg0'].dropna().cumsum().apply(np.exp)
        df['cumulative_return1'] = df['logret_leg1'].dropna().cumsum().apply(np.exp)
 #       print(df['cumulative_return1'], df['cumulative_return0'])
        last_cumulative_return0 = df['cumulative_return0'].iloc[-1] -1  # Get the last value
        last_cumulative_return1 = df['cumulative_return1'].iloc[-1] -1
        
        total_return = (last_cumulative_return0 + last_cumulative_return1) *100
        pair_returns.append((pair,total_return))
        
     #   pair_ret = (np.exp(df['logret_stock0'] * df['pos0'].shift() - (df['pos0_diff'] * fee)) + 
              #      np.exp(df['logret_stock1'] * df['pos1'].shift() - (df['pos1_diff'] * fee)))
      #  pair_daily_returns.append(pair_ret)
        
    Final_returns = pd.DataFrame(pair_returns, columns=['Pair', 'Last_Cumpair_Return'])
    freturn =  Final_returns['Last_Cumpair_Return'].mean() 
    
 #   pair_daily_returnsf = pd.concat(pair_daily_returns, axis=1)

#    weights = np.array([1 / len(optimal_pairs)] * len(optimal_pairs))
#    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(pair_daily_returnsf.cov(), weights)))
    
 #   rfrate = risk_free.mean() 
  #  portfolio_excess_return = freturn*2 - rfrate # annualize without coumpounding

#    annualized_portfolio_excess_return = (((1+freturn/100) ** 2) - 1)*100 - rfrate
#    sharpe_ratio = annualized_portfolio_excess_return / (portfolio_volatility*np.sqrt(252)*100)
#    print(Final_returns)

    return freturn#, portfolio_volatility, sharpe_ratio

In [44]:
import pandas as pd
import numpy as np
# Create an empty list to store DataFrames for each semester
portfolio_dfs100bp = []

# Loop through the backtest results by semester
for e in range(1,  len(optimal_pairs_by_semester) - 1):
    prices_dict = pair_results['Semester_' + str(e + 1)]
    lookback_dict = new_result_dict['Semester_' + str(e)]
    optimal_pairs = optimal_pairs_by_semester['Semester_' + str(e)]
    risk_free = riskfree_bysemester['Semester_' + str(e+1)]
    
    # Call the pnl function for the current semester and pairs
    freturn, dvol, sharpe_ratio = pnl(
        risk_free, prices_dict, optimal_pairs, lookback_dict, closelong=, closeshort=0, sellth=2, buyth=-2, 
        fee=0.01)
    
    # Create a DataFrame for the current semester and portfolio return
    semester_df = pd.DataFrame({'Semester': ['Semester_' + str(e + 1)], 
                                'Portfolio_Return': [freturn],
                                'dvol': [dvol],
                                'sharpe': [sharpe_ratio]})
    
    # Append the DataFrame to the list
    portfolio_dfs100bp.append(semester_df)

KeyboardInterrupt: 

In [18]:
import pandas as pd
import numpy as np
from itertools import product

# Define the parameter value options
closelong_options = [0,-0.1]
closeshort_options = [0,-0.1]
sellth_options = [1.8, 2, 2.2,2.4,2.6]
buyth_options = [-1.8, 2,-2.2,-2.4,2.6]

# Create an empty list to store results
results = []

# Loop through all possible combinations of parameters
for closelong, closeshort, sellth, buyth in product(closelong_options, closeshort_options, sellth_options, buyth_options):
    # Inside this loop, calculate portfolio return for the given parameters
    portfolio_dfs100bp = []
    for e in range(1, len(optimal_pairs_by_semester) - 1):
        prices_dict = pair_results['Semester_' + str(e+1)]
        lookback_dict = new_result_dict['Semester_' + str(e)]
        optimal_pairs = optimal_pairs_by_semester['Semester_' + str(e)]
        risk_free = riskfree_bysemester['Semester_' + str(e+1)]

        freturn= pnl(
            risk_free, prices_dict, optimal_pairs, lookback_dict, closelong, closeshort, sellth, buyth,
            fee=0.01)

        semester_df = pd.DataFrame({'Semester': ['Semester_' + str(e)],
                                    'Portfolio_Return': [freturn]})
        portfolio_dfs100bp.append(semester_df)

    combined_portfolio100bp = pd.concat(portfolio_dfs100bp, ignore_index=True)
    mean_portfolio_return = combined_portfolio100bp['Portfolio_Return'].mean()
    
    # Append the result to the list
    results.append({
        'closelong': closelong,
        'closeshort': closeshort,
        'sellth': sellth,
        'buyth': buyth,
        'mean_portfolio_return': mean_portfolio_return
    })
    print(f"Setup: closelong={closelong}, closeshort={closeshort}, sellth={sellth}, buyth={buyth}")
    print(f"Mean Portfolio Return: {mean_portfolio_return}")

# Find the combination with the highest mean portfolio return
best_parameters = max(results, key=lambda x: x['mean_portfolio_return'])

print("Best Parameters:")
print("closelong:", best_parameters['closelong'])
print("closeshort:", best_parameters['closeshort'])
print("sellth:", best_parameters['sellth'])
print("buyth:", best_parameters['buyth'])
print("Mean Portfolio Return:", best_parameters['mean_portfolio_return'])

KeyboardInterrupt: 

In [17]:
import statsmodels.api as sm
import statsmodels.tsa.api as smt
import math
def calc_ou_par(pair, result_dict, i):
    security_0 = pair[0]
    security_1 = pair[1]

    end_index = len(open_bysemester['Semester_' + str(e)])  +i
    start_index = i #+ int(len(open_bysemester['Semester_' + str(e)]) * 2/3)
    window_data0 = result_dict[start_index:end_index][security_0].pct_change().dropna()
    window_data1 = result_dict[start_index:end_index][security_1].pct_change().dropna()

  
    x = sm.add_constant(np.asarray(window_data0))
    y = np.asarray(window_data1)

    # Get parameters and calculate spread
    model = sm.OLS(y, x)
    result = model.fit()

    alpha, beta = result.params[0], result.params[1]

    res = y - (alpha + beta * x.T[1])
    cumres = res.cumsum()
    cumres_series = pd.Series(cumres)
    x_cumres = sm.add_constant(np.asarray(cumres_series.shift(1).dropna()))
    y_cumres = cumres_series[1:].reset_index(drop=True)
    model_cumres = sm.OLS(y_cumres, x_cumres)
    result_cumres = model_cumres.fit()
    a, b = result_cumres.params[0], result_cumres.params[1]

    aux_residuals = result_cumres.resid
    var_auxres = np.var(aux_residuals)
    
    # Calculate kappa and m
    kappa = -math.log(b) * len(open_bysemester['Semester_' + str(e)])
    m = a / (1 - b)

    sigma = math.sqrt(var_auxres * 2 * kappa / (1 - b**2))
    sigma_eq = math.sqrt(var_auxres / (1 - b**2))
    s = -m / sigma_eq
    s_mod = s - a/(kappa*sigma_eq) 

    return s_mod, beta


def pnl(risk_free, prices_dict, optimal_pairs, lookback_dict, closelong, closeshort, sellth, buyth, fee):
    last_cum_returns = []
    pair_returns = []
    pair_daily_returns = []
    """

    """
    for pair, df in zip(optimal_pairs, prices_dict.values()):
        signals0 = np.zeros(len(df))
        pos1 = np.zeros(len(df))
        pos0 = np.zeros(len(df))
        days_since_trade = 0
        beta = 0
        
        for i in range(0, len(df)): 
            if i < 1:
                prev_signal = 0
            else:
                prev_signal = signals0[i - 1]

            s_score, curr_beta = calc_ou_par(pair, lookback_dict, i)
            
         #   if i==0:
          #      s_score=0
           # else:
            #    s_score=s_score

            if days_since_trade % 6 == 0:
                # Update beta every 5 days
                beta = curr_beta
            if (s_score >= sellth and prev_signal == 0) or (s_score <= buyth and prev_signal == 0):
                # Enter trade at current_beta
                signals0[i] = -1 if s_score >= sellth else 1
                days_since_trade = 1  # Reset the days since trade
                beta = curr_beta  # Set beta to current beta
            elif closelong < s_score and prev_signal == 1:
                signals0[i] = 0
            elif closeshort > s_score and prev_signal == -1:
                signals0[i] = 0
            else:
                signals0[i] = prev_signal
            
            if days_since_trade > 0:
                days_since_trade += 1
            
            if abs(beta) <= 1:
                pos0[i] = -signals0[i] # * beta 
                pos1[i] = signals0[i] 
            else:
                pos0[i] = -signals0[i]
                pos1[i] = signals0[i] # / beta
                
        df['pos0'] = pos0
        df['pos1'] = pos1
        
        pos0[-1] = 0
        pos1[-1] = 0
        
        df['pos0_diff'] = df['pos0'].diff().abs()
        df['pos1_diff'] = df['pos1'].diff().abs() # units buyed or selled for transactions cost
        
        df['logret_stock0'] = np.log(df['open0'] / df['open0'].shift(1))
        df['logret_stock1'] = np.log(df['open1'] / df['open1'].shift(1))
        
        df['logret_leg0'] = df['logret_stock0'] * df['pos0'].shift(1) - (df['pos0_diff'] * fee) 
        df['logret_leg1'] = df['logret_stock1'] * df['pos1'].shift(1) - (df['pos1_diff'] * fee)

        df['cumulative_return0'] = df['logret_leg0'].dropna().cumsum().apply(np.exp)
        df['cumulative_return1'] = df['logret_leg1'].dropna().cumsum().apply(np.exp)
 #       print(df['cumulative_return1'], df['cumulative_return0'])
        last_cumulative_return0 = df['cumulative_return0'].iloc[-1] -1  # Get the last value
        last_cumulative_return1 = df['cumulative_return1'].iloc[-1] -1
        
        total_return = (last_cumulative_return0 + last_cumulative_return1) *100
        pair_returns.append((pair,total_return))
        
  #      pair_ret = (np.exp(df['logret_stock0'] * df['pos0'].shift() - (df['pos0_diff'] * fee)) + 
   #                 np.exp(df['logret_stock1'] * df['pos1'].shift() - (df['pos1_diff'] * fee)))
    #    pair_daily_returns.append(pair_ret)
        
    Final_returns = pd.DataFrame(pair_returns, columns=['Pair', 'Last_Cumpair_Return'])
    freturn =  Final_returns['Last_Cumpair_Return'].mean() 
    
  #  pair_daily_returnsf = pd.concat(pair_daily_returns, axis=1)

   # weights = np.array([1 / len(optimal_pairs)] * len(optimal_pairs))
  #  portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(pair_daily_returnsf.cov(), weights)))
    
 #   rfrate = risk_free.mean() 
  #  portfolio_excess_return = freturn*2 - rfrate # annualize without coumpounding
 #   annualized_portfolio_excess_return = (((1+freturn/100) ** 2) - 1)*100 - rfrate
  #  sharpe_ratio = annualized_portfolio_excess_return / (portfolio_volatility*np.sqrt(252)*100)

    return freturn#, portfolio_volatility, sharpe_ratio

In [16]:
import pandas as pd
import numpy as np
# IN-SAMPLE FIX
optimal_pairs_by_semester['Semester_34'] = [pair for pair in optimal_pairs_by_semester['Semester_34'] if pair != ('ASR', 'CIVI')]
np.array(optimal_pairs_by_semester['Semester_34'])
type(optimal_pairs_by_semester['Semester_34'])
pair_results = {}
# Iterate through semesters
for i in range(1, len(optimal_pairs_by_semester) + 1):
    semester_pairs = optimal_pairs_by_semester['Semester_' + str(i)]
    open_prices_next_semester = open_bysemester['Semester_' + str(i)] # del +1
    
    semester_pair_data = {}

    # Iterate through pairs in the current semester
    for idx, pair in enumerate(semester_pairs):
        security_0_open = open_prices_next_semester[pair[0]]
        security_1_open = open_prices_next_semester[pair[1]]
        
        pair_data = pd.DataFrame({
            'open0': security_0_open,
            'open1': security_1_open
        })
        
        # Store the DataFrame for the pair in the semester_pair_data dictionary
        semester_pair_data[idx] = pair_data
    
    # Store the dictionary of DataFrames for the semester in the pair_results dictionary
    pair_results['Semester_' + str(i)] = semester_pair_data #del +1

In [5]:
import pandas as pd
import numpy as np
# Create an empty list to store DataFrames for each semester
portfolio_dfs100bp = []

# Loop through the backtest results by semester
for e in range(1,  len(optimal_pairs_by_semester) - 1):
    prices_dict = pair_results['Semester_' + str(e + 1)]
    lookback_dict = new_result_dict['Semester_' + str(e)]
    optimal_pairs = optimal_pairs_by_semester['Semester_' + str(e)]
    risk_free = riskfree_bysemester['Semester_' + str(e+1)]
    
    # Call the pnl function for the current semester and pairs
    freturn, dvol, sharpe_ratio = pnl(
        risk_free, prices_dict, optimal_pairs, lookback_dict, closelong=0, closeshort=0, sellth=1, buyth=-1, 
        fee=0.01)

    semester_df = pd.DataFrame({'Semester': ['Semester_' + str(e + 1)], 
                                'Portfolio_Return': [freturn],
                                'dvol': [dvol],
                                'sharpe': [sharpe_ratio]})

    portfolio_dfs100bp.append(semester_df)

In [6]:
import pandas as pd
import numpy as np
# Create an empty list to store DataFrames for each semester
portfolio_dfs50bp = []

# Loop through the backtest results by semester
for e in range(1,  len(optimal_pairs_by_semester) - 1):
    prices_dict = pair_results['Semester_' + str(e + 1)]
    lookback_dict = new_result_dict['Semester_' + str(e)]
    optimal_pairs = optimal_pairs_by_semester['Semester_' + str(e)]
    risk_free = riskfree_bysemester['Semester_' + str(e+1)]
    
    # Call the pnl function for the current semester and pairs
    freturn, dvol, sharpe_ratio = pnl(
        risk_free, prices_dict, optimal_pairs, lookback_dict, closelong=0, closeshort=0, sellth=1, buyth=-1, 
        fee=0.005)
    
    # Create a DataFrame for the current semester and portfolio return
    semester_df = pd.DataFrame({'Semester': ['Semester_' + str(e + 1)], 
                                'Portfolio_Return': [freturn],
                                'dvol': [dvol],
                                'sharpe': [sharpe_ratio]})
    
    # Append the DataFrame to the list
    portfolio_dfs50bp.append(semester_df)

In [18]:
portfolio_dfs100bp = []

# Loop through the backtest results by semester
for e in range(1,  len(optimal_pairs_by_semester) - 1):
    prices_dict = pair_results['Semester_' + str(e + 1)]
    lookback_dict = new_result_dict['Semester_' + str(e)]
    optimal_pairs = optimal_pairs_by_semester['Semester_' + str(e)]
    risk_free = riskfree_bysemester['Semester_' + str(e+1)]
    
    # Call the pnl function for the current semester and pairs
    freturn, dvol, sharpe_ratio = pnl(
        risk_free, prices_dict, optimal_pairs, lookback_dict, closelong=0, closeshort=0, sellth=1, buyth=-1, 
        fee=0.01)
    
    # Create a DataFrame for the current semester and portfolio return
    semester_df = pd.DataFrame({'Semester': ['Semester_' + str(e + 1)], 
                                'Portfolio_Return': [freturn],
                                'dvol': [dvol],
                                'sharpe': [sharpe_ratio]})
    
    # Append the DataFrame to the list
    portfolio_dfs100bp.append(semester_df)



KeyboardInterrupt: 