In [1]:
# Necessary imports
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import quantstats as qs
import statistics as st
from datetime import datetime, timedelta
from matplotlib.colors import DivergingNorm
from scipy.signal import convolve2d
from IPython.core.display import HTML
HTML("""<style>.output_png img {display: block;margin-left: auto;margin-right: auto;} </style>""")

In [2]:
# Settings for notebook visualization
qs.extend_pandas()
plt.rcParams['figure.figsize'] = (20,6) #(26,12)
# plt.rcParams['text.color'] = 'w'
# plt.rcParams['legend.facecolor'] = '#2f3540'
# plt.rcParams['xtick.color'] = 'w'
# plt.rcParams['ytick.color'] = 'w'
# plt.rcParams['axes.labelcolor'] = 'w'

color = 'k'
plt.rcParams['text.color'] = color
plt.rcParams['legend.facecolor'] = 'w'
plt.rcParams['xtick.color'] = color
plt.rcParams['ytick.color'] = color
plt.rcParams['axes.labelcolor'] = color


SMALL_SIZE = 13
MEDIUM_SIZE = 15
BIG_SIZE = 17
plt.rcParams['font.size'] = SMALL_SIZE
plt.rcParams['axes.titlesize'] = BIG_SIZE
plt.rcParams['axes.labelsize'] = MEDIUM_SIZE
plt.rcParams['xtick.labelsize'] = MEDIUM_SIZE
plt.rcParams['ytick.labelsize'] = MEDIUM_SIZE
plt.rcParams['legend.fontsize'] = SMALL_SIZE
plt.rcParams['figure.titlesize'] = BIG_SIZE

np.set_printoptions(edgeitems=10, linewidth=1000) 

In [3]:
# Load DF with SP500 data
def get_sp500_data(start_date="1970-01-02", from_local_file=True, save_to_file=False):
    if from_local_file == True:
        data = pd.read_pickle('data/SP500_hist_data.pkl')
        return data
    else:
        # Download data from yfinance
        data = yf.download("^GSPC", auto_adjust=True, start=start_date)
        if save_to_file == True:
            data.to_pickle("data/SP500_hist_data.pkl")
        return data

In [22]:
full_df = get_sp500_data(start_date='1995')
ini_equity = 100
commision = 0.000111538462 # 2/130000 + 12.5/130000
# 0.01 = 1% of the daily return

In [5]:
# Define if the strategy position (1=fast_ma higher than slow_ma, -1=short)
def get_strategy_position(df, fast_ma=1, slow_ma=1):
    df['fast_ma'] = full_df['Close'].rolling(window=fast_ma).mean()
    df['slow_ma'] = full_df['Close'].rolling(window=slow_ma).mean()
    df['fast-slow'] = df['fast_ma'].sub(df['slow_ma'])
    return np.where(df['fast-slow'] < 0, -1, 1)

In [6]:
def print_periods(IS_start_years, IS_end_years, OOS_start_years, OOS_end_years):
    print("\tIn SAMPLE\t\tOOS")
    for iss, ie, oi, oe in zip(IS_start_years, IS_end_years, OOS_start_years, OOS_end_years):
        print("{:%Y-%m-%d} {:%Y-%m-%d} \t {:%Y-%m-%d} {:%Y-%m-%d}".format(iss, ie, oi, oe))

In [7]:
def print_backtest_stats(df, fast_ma, slow_ma, ret_strat=np.nan, sr_strat=np.nan):
    first_row = df.index.values[0]
    last_row = df.index.values[-1]

    if 'Market_cum_ret' not in df.columns:
        df['Market_cum_ret'] = df['Market_daily_ret'].add(1).cumprod().mul(ini_equity)
        df.loc[first_row, 'Market_cum_ret'] = ini_equity + df.loc[first_row, 'Market_daily_ret'] * 100
    
    ini_money = df.loc[first_row, 'Market_cum_ret'] / (1 + df.loc[first_row, 'Market_daily_ret'])
    if np.isnan(ini_money):
        ini_money = ini_equity
#     print("ini_money = {}".format(ini_money))
    
    ret_market = (df.loc[last_row, 'Market_cum_ret'] / ini_money) * 100
    sr_market = df['Market_daily_ret'].sharpe()

    print("\tPeriod: {:%Y-%m-%d} to {:%Y-%m-%d}".format(df.index[0], df.index[-1]))
    print("\tOverall return of SP500: {:.2f} %. SR of SP500: {:.2f}".format(ret_market, sr_market))
    if (fast_ma >= slow_ma):
        print("\tOverall return of long only: {:.2f} %. Sharpe ratio strategy: {:.2f}".format(ret_market, sr_market))
    else:
        print("\tOverall return of {}-{} MA crossover: {:.2f} %. Sharpe ratio strategy: {:.2f}".format(fast_ma, slow_ma, ret_strat, sr_strat))

    return

In [8]:
def backtest_ma_strat(df, fast_ma=1, slow_ma=1, last_position=0, ini_equity=ini_equity, commision=commision):
    """
    backtest_ma_strat does the backtest of an MA crossover strategy. It adds the following columns to the received dataframe:
        - Strat_position: Position of the strategy on the index. 1:long, -1:short
        - Market_daily_ret: daily returns of the benchmark
        - Strat_daily_ret: daily returns of the strategy
        - Market_cum_ret: daily cummulative returns of the benchmark
        - Strat_cum_ret: daily cummulative returns of the strategy

    Args:
        df (DataFrame): df with 'Close' returns of the benchmark
        fast_ma (int): fast moving average
        slow_ma (int): slow moving average
        ini_equity (int): initial equity to be invested in the benchmark and strategy. 100 by default

    Returns
        df['Strat_daily_ret'] (pd.Series): Daily returns of the strategy
        ret_strat (float): final value of the investment on the strategy with respect to the initial equity invested 
        sr_strat (float): Sharpe Ratio of the strategy
    """
    
    if (fast_ma >= slow_ma): # Long only
        df['Strat_position'] = 1
        df['Long_only'] = 1
    else:
        df['Strat_position'] = get_strategy_position(df.copy(), fast_ma, slow_ma)
        df['Long_only'] = 0

    # Default daily transaction cost set to 0. If the postition has change from previous day, we add a commision
    df['Costs'] = 0
    if commision > 0: 
        df['Costs'] = df['Costs'].where(df['Strat_position'] == df['Strat_position'].shift(1).fillna(last_position), other=commision)
        
    # Strat_daily_ret = (Market_daily_ret * shift(Strat_position) - Costs
    df['Strat_daily_ret'] = df['Market_daily_ret'].mul(df['Strat_position'].shift(1).fillna(last_position)).sub(df['Costs'])
        
    first_day = df.index.values[0]
    df['Market_cum_ret'] = df['Market_daily_ret'].add(1).cumprod().mul(ini_equity)
    df['Strat_cum_ret'] = df['Strat_daily_ret'].add(1).cumprod().mul(ini_equity)
    df.loc[first_day, 'Market_cum_ret'] = ini_equity + df.loc[first_day, 'Market_daily_ret'] * ini_equity
    df.loc[first_day, 'Strat_cum_ret'] = ini_equity + df.loc[first_day,'Strat_daily_ret'] * ini_equity
        
    ret_strat = df['Strat_cum_ret'][-1] 
    sr_strat = df['Strat_daily_ret'].sharpe()
    
    cols = ['Close', 'Market_daily_ret', 'Strat_daily_ret', 'Costs', 'Strat_position', 'Long_only', 'Market_cum_ret', 'Strat_cum_ret']
    if set(df.columns.values) != set(cols):
        #print(df.columns.values)
        #print(cols)
        print("CHECK end of backtest_ma_strat")
    
    df = df[cols]
    
    return df['Strat_daily_ret'], ret_strat, sr_strat

In [9]:
# REVISE 
def ma_backtest_print_plot(df, last_position=0, fast_ma=1, slow_ma=1, ini_equity=ini_equity, commision=commision):
    _, ret_strat, sr_strat = backtest_ma_strat(df, last_position=last_position, fast_ma=fast_ma, slow_ma=slow_ma, commision=commision)
    print_backtest_stats(df, fast_ma=fast_ma, slow_ma=slow_ma, ret_strat=ret_strat, sr_strat=sr_strat)
    show_plot(df, start=df.index.min(), end=df.index.max())

In [20]:
def run_all_combinations(df, fast_ma, slow_ma):
    """
    Runs a backtest with all possible combinations and returns 2 matrices, one with pnl results, and one with SR
    # allow_long_only set to True allows the algorithm to take only a Long position on the market
    """
    results_pnl = np.zeros((len(fast_ma),len(slow_ma)))
    results_sharpe = np.zeros((len(fast_ma),len(slow_ma)))

    _, pnl_SP, sharpe_SP = backtest_ma_strat(df, fast_ma=0, slow_ma=0)

    for i, fast in enumerate(fast_ma):
        for j, slow in enumerate(slow_ma):
            if fast < slow:
                _, pnl, sharpe = backtest_ma_strat(df, fast, slow, last_position=0)
                results_pnl[i,j] = pnl
                results_sharpe[i,j] = sharpe
            #else:
                #if allow_long_only == True:
                 #   results_pnl[i,j] = pnl_SP
                  #  results_sharpe[i,j] = sharpe_SP
#                 else:
#                     results_pnl[i,j] = 0
#                     results_sharpe[i,j] = 0


    return results_pnl, results_sharpe, sharpe_SP

In [11]:
def get_best_combination(market_sharpe, results_sharpe, num_neighbors_matrix, allow_long_only=True):
    n = len(results_sharpe)

    sum_sharpe_neighbors = convolve2d(results_sharpe, np.ones((3,3)),'same') - results_sharpe
        
    results_sharpe_neighbors = np.divide(sum_sharpe_neighbors, num_neighbors_matrix)
    
    # (Individual SR + neighbors SR) / 2
    results_sharpe_combined = np.divide(np.add(results_sharpe, results_sharpe_neighbors), 2)
    
    results_sharpe_combined = np.nan_to_num(results_sharpe_combined, nan=market_sharpe)
    
    # Get index from best SR
    fast_index, slow_index = np.unravel_index(np.argmax(results_sharpe_combined, axis=None), results_sharpe_combined.shape)

    #print("Individual: {}-{}".format(fast_ma[fast_index_ind], slow_ma[slow_index_ind]))
    #print("With NN: {}-{}".format(fast_ma[fast_index], slow_ma[slow_index]))
    #print("Best: {}-{}".format(fast_ma[fast_index], slow_ma[slow_index]))

    return fast_index, slow_index, results_sharpe_combined

In [12]:
def check_nan_around(matrix):
    n_rows = matrix.shape[0]
    n_cols = matrix.shape[1]

    num_nan_around = np.full((n_rows, n_cols), 0)

    for i in range(n_rows):
        for j in range(n_cols):
            counter = 0
            for ii in range(i-1, i+2):
                if (ii >= 0) and (ii < n_rows):
                    for jj in range(j-1, j+2):
                        if (jj >= 0) and (jj < n_cols):
                            if np.isnan(matrix[ii, jj]) == True:
                                counter += 1
            num_nan_around[i,j] = counter

    return num_nan_around

In [13]:
def get_num_neighbors(fast_ma_list, slow_ma_list):
    n_rows = len(fast_ma_list)
    n_cols = len(fast_ma_list)
    
    n_neighbors = np.full((n_rows, n_cols), 8.0) # Default number of neighbors of each cell
    n_neighbors[[0,n_cols-1], :] = n_neighbors[:, [0,n_cols-1]] = 5 # Edges
    n_neighbors[[0,n_cols-1], [0,n_cols-1]] = n_neighbors[[n_rows-1,0], [0,n_cols-1]] = 3 # Corners

    for i in range(n_rows):
        for j in range(n_cols):
            if fast_ma_list[i] >= slow_ma_list[j]:
                n_neighbors[i,j] = np.nan

    num_notnan_neighbors = n_neighbors - check_nan_around(n_neighbors)
    
    return num_notnan_neighbors

In [25]:
def prepare_oos_df(df, start_date=0, end_date=0):
    if ((start_date == 0) | (end_date == 0)):
        start_date = df.index[0]
        end_date = df.index[-1]
        
    if ('Strat_daily_ret' in df.columns) == False:
        df['Strat_daily_ret'] = df['Strat_daily_ret']
    
    # Fill empty spots of Strat_daily_ret with the return of that day. This spots are the first day of each OOS window
    df.loc[:, 'Strat_daily_ret'].fillna(
        df['Market_daily_ret'].mul(df['Strat_position'].shift(1, fill_value=1)),
        inplace=True)
    
    cols = ['Close', 'Market_daily_ret', 'Strat_daily_ret', 'Strat_position', 'Long_only', 'Costs']
    
    results_df = df.loc[start_date:end_date, cols].copy()

    results_df['Market_cum_ret'] = (results_df['Market_daily_ret'] + 1).cumprod().fillna(1) * ini_equity

#     first_business_day = df[start_date].index.min()  + pd.tseries.offsets.BusinessDay(n=1)
    first_business_day = df.loc[start_date].index[0]
#    first_business_day = df[str(start_date.year)].index[0]

#     results_df.loc[first_business_day, 'Strat_daily_ret'] = results_df.loc[first_business_day, 'Market_daily_ret'] - commision # Ret of first day = ret of market
    results_df['Strat_cum_ret'] = (results_df['Strat_daily_ret'] + 1).cumprod() * ini_equity
#     results_df.loc[first_business_day, 'Strat_cum_ret'] = ini_equity - commision
    
    return results_df

In [15]:
def show_oos_plot(results_df):
    if results_df.index.min().year == results_df.index.max().year:
        #fmt = "%Y-%m-%d"
        fmt = 'D'
    else:
        #fmt = "%Y"
        fmt = 'Y'
    
    title = "SP500 vs Optimized MA crossover strategy ({} - {})".format(
        np.datetime64(results_df.index[0], fmt), np.datetime64(results_df.index[-1], fmt))
        #results_df.index.min().strftime(fmt), results_df.index.max().strftime(fmt))
    
    cols = ['Market_cum_ret', 'Strat_cum_ret', 'Long_only', 'Strat_position']
    colors = ('b', 'tab:orange', 'tab:brown', 'g')

    results_df[cols].plot(title=title, grid=True, color=colors, secondary_y=['Long_only', 'Strat_position'])
    
    #results_df[['Market_cum_ret', 'Strat_cum_ret']].plot(title=title, grid=True)
    #results_df.loc[results_df.index.min():results_df.index.max(), ['Strat_position', 'Long_only']].plot(secondary_y=True, legend='Strategy Position')

In [16]:
def show_plot(df, start='1973-01-01', end='2020-12-31', norm=True, benchmark=True, position=True, fast_ma=1, slow_ma=1):
    df_plot = df[start:end].copy()

    first_day = df_plot.index[0]
    last_day = df_plot.index[-1]
    
    if first_day.year == last_day.year:
        #fmt = "%Y-%m-%d"
        fmt = 'D'
    else:
        #fmt = "%Y"
        fmt = 'Y'
    
    cols = ['Strat_cum_ret']
    colors = ['tab:orange']
    title = 'MA crossover strategy ({} - {})'.format(np.datetime64(first_day, fmt), np.datetime64(last_day, fmt))
    
#    title = 'MA crossover strategy ({} - {})'.format(start.strftime(fmt), end.strftime(fmt))

    if (benchmark == True):
        title = 'SP500 vs ' + title
        colors.append('b')
        if norm == False:
            cols.append('Close')
        else:
            cols.append('Market_cum_ret')
            
    if norm == False:
        ini_money = df_plot.loc[first_day, 'Close'] / (1 + df_plot.loc[first_day, 'Strat_daily_ret'])
        df_plot['Strat_cum_ret'] = df_plot['Strat_daily_ret'].add(1).cumprod().mul(ini_money)
        df_plot.loc[first_day, 'Strat_cum_ret'] = ini_money + df_plot.loc[first_day,'Strat_daily_ret'] * ini_money
    
    if (slow_ma > 1) & (fast_ma >= 1):
        cols.append('fast_ma')
        cols.append('slow_ma')
        colors.append('r')
        colors.append('y')
        df_plot['fast_ma'] = full_df['Close'].rolling(window=fast_ma).mean()
        df_plot['slow_ma'] = full_df['Close'].rolling(window=slow_ma).mean()
    
    # PLOT
    if position == True:
        cols.append('Long_only')
        cols.append('Strat_position')
        colors.append('tab:brown')
        colors.append('g')
        df_plot.loc[start:end, cols].plot(title=title, grid=True, secondary_y=['Strat_position', 'Long_only'], color=colors)

    else:
        df_plot.loc[start:end, cols].plot(title=title, grid=True, color=tuple(colors))


In [21]:
# Plots a heatmap with data from a matrix. 
def show_heatmap(data, market_sharpe, plot_title, x_title, x_values, y_title, y_values):
    # Flip matrix vertically for better visualization
    data = np.flip(data, axis=0)
    #data = np.nan_to_num(data, nan=market_sharpe)
    fig, ax = plt.subplots(figsize=(10, 8)) #11,9

    rdgn = sns.diverging_palette(h_neg=10, h_pos=130, as_cmap=True, s=80, l=50)
#    divnorm = DivergingNorm(vmin=data.min(), vcenter=0, vmax=data.max())
#    sns.heatmap(data, cmap=rdgn, norm=divnorm, annot=True, fmt ='.2', 
    fig = sns.heatmap(data, cmap=rdgn, annot=True, fmt =".2f", 
                vmin=-1.0, center=0, vmax=1.0,
                linecolor='black', cbar=True, ax=ax,
                xticklabels=x_values, yticklabels=np.flip(y_values))
    
    ax.set(title=plot_title, xlabel=x_title, ylabel=y_title)
    
    plt.yticks(rotation=0)

    #plt.show()

In [18]:
def add_data(data, new_data, name):
    columns = pd.MultiIndex.from_product([[name], metrics])
    new_df = pd.DataFrame(new_data, index=index, columns=columns)
    concatenate = pd.concat([data, new_df], axis=1)

    return concatenate

In [19]:
def save_report(returns, benchmark="SPY"):
    color = 'k'
    plt.rcParams['text.color'] = color
    plt.rcParams['legend.facecolor'] = 'w'
    plt.rcParams['xtick.color'] = color
    plt.rcParams['ytick.color'] = color
    plt.rcParams['axes.labelcolor'] = color
    
    qs.reports.html(returns, benchmark)
    
    plt.rcParams['text.color'] = 'w'
    plt.rcParams['legend.facecolor'] = '#2f3540'
    plt.rcParams['xtick.color'] = 'w'
    plt.rcParams['ytick.color'] = 'w'
    plt.rcParams['axes.labelcolor'] = 'w'