# Back Testing

- Import Statements

In [55]:
import numpy as np
from pandas import Timestamp
import datetime as dt
import pandas as pd
import yfinance as yf
import math
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mpl_dates
import matplotlib.pyplot as plt

from operator import itemgetter


In [56]:

def sanitize(df):    
    if df.empty:
        return
    if len(df.columns) > 0:
        common_names = {
            "Date": "date",
            "Time": "time",
            "Timestamp": "timestamp",
            "Datetime": "datetime",
            "Open": "open",
            "High": "high",
            "Low": "low",
            "Close": "close",
            "Adj Close": "adj_close",
            "Volume": "volume",
            "Dividends": "dividends",
            "Stock Splits": "split",
            "open_price": "open",
            "high_price": "high",
            "low_price": "low",
            "close_price": "close",
            "traded_quantity": "volume",
        }
        # Preemptively drop the rows that are all NaNs
        # Might need to be moved to AnalysisIndicators.__call__() to be
        #   toggleable via kwargs.
        # df.dropna(axis=0, inplace=True)
        # Preemptively rename columns to lowercase
        df.rename(columns=common_names, errors="ignore", inplace=True)
        
        col_types = {
            "open": float,
            "high": float,
            "low": float,
            "close": float,
        }
        
        df = df.astype(col_types)

        # Preemptively lowercase the index
        index_name = df.index.name
        if index_name is not None:
            df.index.rename(index_name.lower(), inplace=True)
        else:
            df.set_index(pd.DatetimeIndex(df['date']))
            
        return df
    else:
        raise AttributeError(f"[X] No columns!")

In [57]:
import datetime as dt
from operator import itemgetter
import sys

import numpy as np
from pandas import Timestamp


def _create_level_object(row, type):
    open_ = row["open"]
    high = row["high"]
    low = row["low"]
    close = row["close"]

    levels = []
    level = {}
    level["type"] = f"{type}_O"
    level["level"] = np.round(open_, 2)
    level["is_support"] = True
    levels.append(level)

    level = {}
    level["type"] = f"{type}_H"
    level["level"] = np.round(high)
    level["is_support"] = False
    levels.append(level)

    level = {}
    level["type"] = f"{type}_L"
    level["level"] = np.round(low)
    level["is_support"] = True
    levels.append(level)

    level = {}
    level["type"] = f"{type}_C"
    level["level"] = np.round(close)
    level["is_support"] = False
    levels.append(level)
    return levels


def _current_previous_levels(df, type):
    levels = []

    level = _create_level_object(df.iloc[-1], f"C_{type}")
    levels.extend(level)

    level = _create_level_object(df.iloc[-2], f"P_{type}")
    levels.extend(level)

    return levels


def monthly_levels(df):
    df_values = df.resample("M").agg(
        {"open": "first", "high": "max", "low": "min", "close": "last"}
    )
    return _current_previous_levels(df_values, "M")


def weekly_levels(df):
    df_values = df.resample("W").agg(
        {"open": "first", "high": "max", "low": "min", "close": "last"}
    )
    return _current_previous_levels(df_values, "W")


def daily_levels(df):
    return _current_previous_levels(df, "D")


def firty_two_week_levels(df):
    levels = []
    df["52W H"] = df["high"].rolling(window=252, center=False).max()
    df["52W L"] = df["low"].rolling(window=252, center=False).min()

    level = {}
    level["type"] = f"52W_H"
    level["level"] = np.round(df["52W H"].iloc[-1], 2)
    level["is_support"] = False
    levels.append(level)

    level = {}
    level["type"] = f"52W_L"
    level["level"] = np.round(df["52W L"].iloc[-1], 2)
    level["is_support"] = True
    levels.append(level)

    return levels


def all_time_levels(df):
    levels = []
    df["ATH"] = df["high"].max()
    df["ATC"] = df["close"].max()

    level = {}
    level["type"] = f"ATH"
    level["level"] = np.round(df["ATH"].iloc[-1], 2)
    level["is_support"] = False
    levels.append(level)

    level = {}
    level["type"] = f"ATC"
    level["level"] = np.round(df["ATC"].iloc[-1], 2)
    level["is_support"] = False
    levels.append(level)

    return levels


def _support(df, index, n1, n2):
    # n1 n2 before and after candle index
    for i in range(index - n1 + 1, index + 1):
        if df["low"][i] > df["low"][i - 1]:
            return False

    for i in range(index + 1, index + n2 + 1):
        if df["low"][i] < df["low"][i - 1]:
            return False
    return True


def _resistance(df, index, n1, n2):
    # n1 n2 before and after candle index
    for i in range(index - n1 + 1, index + 1):
        if df["high"][i] < df["high"][i - 1]:
            return False

    for i in range(index + 1, index + n2 + 1):
        if df["high"][i] > df["high"][i - 1]:
            return False
    return True


# method 1: fractal candlestick pattern
# determine bullish fractal
def _is_support(df, i):
    cond1 = df["low"][i] < df["low"][i - 1]
    cond2 = df["low"][i] < df["low"][i + 1]
    cond3 = df["low"][i + 1] < df["low"][i + 2]
    cond4 = df["low"][i - 1] < df["low"][i - 2]
    return cond1 and cond2 and cond3 and cond4


# determine bearish fractal
def _is_resistance(df, i):
    cond1 = df["high"][i] > df["high"][i - 1]
    cond2 = df["high"][i] > df["high"][i + 1]
    cond3 = df["high"][i + 1] > df["high"][i + 2]
    cond4 = df["high"][i - 1] > df["high"][i - 2]
    return cond1 and cond2 and cond3 and cond4


# to make sure the new level area does not exist already
def _is_far_from_level(value, levels, df):
    # Clean noise in data by discarding a level if it is near another
    # (i.e. if distance to the next level is less than the average candle size for any given day - this will give a rough estimate on volatility)
    ave = np.mean(df["high"] - df["low"])
    return np.sum([abs(value - level) < ave for _, level in levels]) == 0


# This function, given a price value, returns True or False depending on if it is too near to some previously discovered key level.
def _distance_from_mean(mean, level, unique_levels):
    return np.sum([abs(level - y) < mean for y in unique_levels]) == 0


def _group_noise(levels, price, mean):
    unique_levels = []
    previous_number = None

    unique_level = {}
    unique_level["point"] = 0
    unique_level["min_point"] = sys.maxsize
    unique_level["max_point"] = 0
    unique_level["levels"] = []
    unique_level["types"] = []
    unique_level["dates"] = []
    unique_level["is_support"] = []
    for l in levels:
        level = l["level"]
        type_ = l["type"]
        date_ = l["date"] if "date" in l else None
        is_support = l["is_support"]
        if not previous_number or abs(level - previous_number) < mean:
            if not previous_number:
                previous_number = level

            min_ = min(level, unique_level["min_point"])
            max_ = max(level, unique_level["max_point"])
            unique_level["point"] = max_ if level < price else min_
            unique_level["min_point"] = min_
            unique_level["max_point"] = max_
            unique_level["levels"].append(level)
            unique_level["types"].append(type_)
            unique_level["dates"].append(date_)
            unique_level["is_support"].append(is_support)

            continue

        unique_levels.append(unique_level)
        previous_number = level

        unique_level = {}
        unique_level["point"] = previous_number
        unique_level["min_point"] = level
        unique_level["max_point"] = level
        unique_level["levels"] = [
            level,
        ]
        unique_level["types"] = [
            type_,
        ]
        unique_level["dates"] = [
            date_,
        ]
        unique_level["is_support"] = [is_support,]

    unique_levels.append(unique_level)
    return unique_levels


def _fractal_candlestick_pattern_sr_2(df, n1=2, n2=2, remove_noise=False):
    levels = []
    indexes = list(df.index.values)
    for i in range(2, df.shape[0] - 2):
        index = Timestamp(indexes[i])
        if _support(df, i, n1, n2):
            l = df["low"][i]
            if not remove_noise or _is_far_from_level(l, levels, df):
                levels.append((index, l, "support"))
        elif _resistance(df, i, n1, n2):
            l = df["high"][i]
            if not remove_noise or _is_far_from_level(l, levels, df):
                levels.append((index, l, "resistance"))
    return levels


# method 2: window shifting method
def _window_shifting_method_sr(df, window=5, remove_noise=False):
    levels = []
    max_list = []
    min_list = []
    for i in range(window, len(df) - window):
        high_range = df["high"][i - window : i + window - 1]
        current_max = high_range.max()
        if current_max not in max_list:
            max_list = []
        max_list.append(current_max)
        if len(max_list) == window and (
            not remove_noise or _is_far_from_level(current_max, levels, df)
        ):
            levels.append((high_range.idxmax(), current_max, "resistance"))

        low_range = df["low"][i - window : i + window]
        current_min = low_range.min()
        if current_min not in min_list:
            min_list = []
        min_list.append(current_min)
        if len(min_list) == window and (
            not remove_noise or _is_far_from_level(current_min, levels, df)
        ):
            levels.append((low_range.idxmin(), current_min, "support"))
    return levels


def get_support_resistance(df, n1=2, n2=2, window=5):
    # n1 n2 before and after candle index
    all_pivots_dict = []
    levels = _fractal_candlestick_pattern_sr_2(df, n1, n2)
    for level in levels:
        point = np.round(level[1], 2)

        pivot = {}
        pivot["type"] = "SR_FCP"
        pivot["date"] = level[0].to_pydatetime()
        pivot["level"] = point
        pivot["is_support"] = level[2] == "support"
        all_pivots_dict.append(pivot)

    levels = _window_shifting_method_sr(df, window)
    for level in levels:
        point = np.round(level[1], 2)

        pivot = {}
        pivot["type"] = "SR_WSM"
        pivot["date"] = level[0].to_pydatetime()
        pivot["level"] = point
        pivot["is_support"] = level[2] == "support"
        all_pivots_dict.append(pivot)

    return all_pivots_dict


def _find_nearest_index(levels, value):
    array = np.asarray(levels)
    idx = (np.abs(levels - value)).argmin()
    return idx


def _shrink_list_index(levels, ltp, items_count=10):
    idx = _find_nearest_index(levels, ltp)

    min_idx = idx - items_count
    max_idx = idx + items_count

    if min_idx < 0:
        min_idx = 0

    if max_idx > len(levels):
        max_idx = len(levels)

    return min_idx, max_idx


def get_eod_sr_levels(df_yearly, dfs):
    levels = []

    ml = all_time_levels(df_yearly)
    levels.extend(ml)

    ml = firty_two_week_levels(df_yearly)
    levels.extend(ml)

    ml = monthly_levels(df_yearly)
    levels.extend(ml)

    ml = weekly_levels(df_yearly)
    levels.extend(ml)

    ml = daily_levels(df_yearly)
    levels.extend(ml)

    ml = get_support_resistance(df_yearly)
    levels.extend(ml)

    for df in dfs:
        ml = get_support_resistance(df)
        levels.extend(ml)

    return levels


def get_intraday_sr_levels(eod_levels, dfs, ltp, offset_mean=None):
    levels = eod_levels
        
    for df in dfs:
        ml = get_support_resistance(df)
        levels.extend(ml)

    sorted_levels = sorted(levels, key=itemgetter("level"), reverse=False)

    if not offset_mean:
        # Clean noise in data by discarding a level if it is near another
        # (i.e. if distance to the next level is less than the average
        # candle size for any given day - this will give a rough estimate on volatility)
        offset_mean = np.mean(dfs[0]["high"] - dfs[0]["low"])

    unique_levels = _group_noise(sorted_levels, ltp, offset_mean)
    points = [x["point"] for x in unique_levels]
    min_, max_ = _shrink_list_index(points, ltp)
    return unique_levels[min_:max_]

def analysis_market_structure(levels):
    last_support = sys.maxsize
    last_resistance = 0
    swing_low = 0
    swing_high = 0
        
    for level in levels:
        value = level["level"]
        if last_support >= value:
            # New Low, so upward swing is no longer valid
            swing_low = 0
            
        if last_resistance <= value:
            # New High, so downward swing is no longer valid
            swing_high = 0
        
        
        if level["is_support"]:                
            if last_support < value and swing_low == 0: 
                swing_low = last_support
            last_support = value
        else:
            if last_resistance > value and swing_high == 0:
                swing_high = last_resistance
            last_resistance = value
        
    return last_support, last_resistance, swing_low, swing_high


Data Collection

In [58]:
def get_stock_price(symbol, period="2y", interval="1d", start_date=None, end_date=None):
  df = yf.download(tickers=symbol, interval=interval, period=period, start=start_date, end=end_date)
  df['Date'] = pd.to_datetime(df.index)
  df['Date'] = df['Date'].apply(mpl_dates.date2num)
  df = df.loc[:,['Date', 'Open', 'High', 'Low', 'Close']]
  return df

In [59]:
symbol = 'TCS.NS'
# symbol = '^NSEBANK'
#symbol = "^NSEI"
#symbol = "MSFT"
df_y = get_stock_price(symbol, "max", "1d")
df_m = get_stock_price(symbol, "2y", "1mo")
df_w = get_stock_price(symbol, "2y", "1wk")
df_d = get_stock_price(symbol, "1y", "1d")
df_h = get_stock_price(symbol, "6mo", "1h")
df_15m = get_stock_price(symbol, "1mo", "15m")
df_5m = get_stock_price(symbol, "7d", "5m")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [60]:
df_y = sanitize(df_y)
df_m = sanitize(df_m)
df_w = sanitize(df_w)
df_d = sanitize(df_d)
df_h = sanitize(df_h)
df_15m = sanitize(df_15m)
df_5m = sanitize(df_5m)

In [None]:
price = df_15m.iloc[-1]["close"]

eod_levels = get_eod_sr_levels(df_y, [df_h, df_m, df_w, df_d])
r1 = get_intraday_sr_levels(eod_levels, [df_5m, df_15m], price)


print(r1)
[x["point"] for x in r1]



In [None]:
eod_levels = get_eod_sr_levels(df_y, df_h)

In [63]:
df = df_d
levels = get_support_resistance(df, n1=1, n2=2)
sorted_levels = sorted(levels, key=itemgetter("date"), reverse=False)
filtered_levels = []
previous_support_level = 0
previous_resistance_level = 0

mean = np.mean(df['high'] - df['low'])
for i in sorted_levels:
  level = i["level"]
  is_support = i["is_support"]
  if is_support and abs(level - previous_support_level) > mean:
    filtered_levels.append(i)
    previous_support_level = level
  elif not is_support and abs(level - previous_resistance_level) > mean:
    filtered_levels.append(i)
    previous_resistance_level = level
    
filtered_levels

# r = analysis_market_structure(filtered_levels)
# r


[{'type': 'SR_FCP',
  'date': datetime.datetime(2021, 11, 23, 0, 0),
  'level': 3407.8,
  'is_support': True},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2021, 12, 3, 0, 0),
  'level': 3665.95,
  'is_support': False},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2021, 12, 6, 0, 0),
  'level': 3522.0,
  'is_support': True},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2021, 12, 30, 0, 0),
  'level': 3680.0,
  'is_support': True},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2022, 1, 4, 0, 0),
  'level': 3889.15,
  'is_support': False},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2022, 1, 6, 0, 0),
  'level': 3772.0,
  'is_support': True},
 {'type': 'SR_WSM',
  'date': datetime.datetime(2022, 1, 10, 0, 0),
  'level': 3978.0,
  'is_support': False},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2022, 1, 17, 0, 0),
  'level': 4043.0,
  'is_support': False},
 {'type': 'SR_FCP',
  'date': datetime.datetime(2022, 1, 27, 0, 0),
  'level': 3625.1,
  'is_support': True},
 {'t

In [1]:
from IPython.display import display, Markdown
display(Markdown('**_some_ markdown** and an [internal reference](render/output/markdown)!'))

**_some_ markdown** and an [internal reference](render/output/markdown)!

In [2]:
import numpy as np
import pandas as pd

np.random.seed(24)
df = pd.DataFrame({'A': np.linspace(1, 10, 10)})
df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],
               axis=1)
df.iloc[3, 3] = np.nan
df.iloc[0, 2] = np.nan

def color_negative_red(val):
    """
    Takes a scalar and returns a string with
    the css property `'color: red'` for negative
    strings, black otherwise.
    """
    color = 'red' if val < 0 else 'white'
    return 'color: %s' % color

def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.max()
    return ['background-color: grey' if v else '' for v in is_max]

df.style.\
    applymap(color_negative_red).\
    apply(highlight_max).\
    set_table_attributes('style="font-size: 10px"')

Unnamed: 0,A,B,C,D,E
0,1.0,1.329212,,-0.31628,-0.99081
1,2.0,-1.070816,-1.438713,0.564417,0.295722
2,3.0,-1.626404,0.219565,0.678805,1.889273
3,4.0,0.961538,0.104011,,0.850229
4,5.0,1.453425,1.057737,0.165562,0.515018
5,6.0,-1.336936,0.562861,1.392855,-0.063328
6,7.0,0.121668,1.207603,-0.00204,1.627796
7,8.0,0.354493,1.037528,-0.385684,0.519818
8,9.0,1.686583,-1.325963,1.428984,-2.089354
9,10.0,-0.12982,0.631523,-0.586538,0.29072
