In [14]:
import yfinance as yf
import pandas as pd
from finta import TA
import numpy as np

In [26]:
def create_ta_features(ticker='^GSPC', start_='2010-01-01', end_='2022-12-31', interval_='1d', fillna=True, scale_to_std=True, fill_weekends=True):
    """
    Creates dataframe with technical analysis features
    :param ticker: ticker symbol to download data for (default is S&P 500)
    :param start_: start date
    :param end_: end date
    :param interval_: data frequency
    :param fillna: whether to fill in missing values
    :param scale_to_std: whether to scale to standard deviation
    :param fill_weekends: whether to fill in weekends
    :return: dataframe with technical analysis features
    """
    # download data
    df = yf.download(ticker, start_, end_, interval=interval_)
    # rename columns
    df.rename(columns={"Open": "open", "Adj Close": "close", "High": "high", "Low": "low", "Volume": "volume"}, inplace=True)
    # drop close column
    df.drop("Close", inplace=True, axis=1)
    # fill weekends
    if fill_weekends:
        df = df.resample('D').ffill()
    # get all functions in finta
    finta_functions = [func for func in dir(TA) if callable(getattr(TA, func)) and not func.startswith("__")]
    # loop through all functions in finta and append the results to the dataframe
    # skip functions that throw errors
    for func in finta_functions:
        try:
            df[func] = getattr(TA, func)(df)
        except:
            pass
    # fill in missing values
    if fillna:
        df.fillna(method='bfill', inplace=True)
        df.fillna(method='ffill', inplace=True)
    # scale to standard deviation, by column
    if scale_to_std:
        df = (df - df.mean()) / df.std()
    return df

In [27]:
# function that adds sine and cosine of weekday, monthday, yearday to dataframe
# takes into account whether the data is daily, hourly, minutely, etc.
# also takes into account whether data includes weekends or not
# if data does not include weekends, assume the week is 5 days, not 7, month is 21 days, not 31, and year is 250 days, not 365
def add_time_features(df):
    """
    Adds sine and cosine of weekday, monthday, yearday to dataframe
    :param df: dataframe to add time features to
    :return: dataframe with time features
    """
    # get frequency of data
    freq = pd.infer_freq(df.index)
    # if frequency is daily, assume data includes weekends
    if freq == 'D':
        include_weekends = True
    else:
        include_weekends = False

    # get number of days in week, month, year
    if include_weekends:
        days_in_week = 7
        days_in_month = 31
        days_in_year = 365
    else:
        days_in_week = 5
        days_in_month = 21
        days_in_year = 250
    # add weekday, monthday, yearday features
    df['weekday'] = df.index.dayofweek
    df['monthday'] = df.index.day
    df['yearday'] = df.index.dayofyear
    # add sine and cosine of weekday, monthday, yearday features
    df['sin_weekday'] = np.sin(2 * np.pi * df['weekday'] / days_in_week)
    df['cos_weekday'] = np.cos(2 * np.pi * df['weekday'] / days_in_week)
    df['sin_monthday'] = np.sin(2 * np.pi * df['monthday'] / days_in_month)
    df['cos_monthday'] = np.cos(2 * np.pi * df['monthday'] / days_in_month)
    df['sin_yearday'] = np.sin(2 * np.pi * df['yearday'] / days_in_year)
    df['cos_yearday'] = np.cos(2 * np.pi * df['yearday'] / days_in_year)
    # drop weekday, monthday, yearday features
    df.drop(['weekday', 'monthday', 'yearday'], inplace=True, axis=1)
    return df

In [28]:
# a function that takes a dataframe and implements a sliding window mechanism with a given window size
# return a list of windows as dataframes
def sliding_window(df, window_size=10):
    """
    Creates a sliding window mechanism for a given dataframe
    :param df: dataframe
    :param window_size: window size
    :return: list of windows as dataframes
    """
    windows = []
    for i in range(len(df) - window_size + 1):
        windows.append(df.iloc[i:i + window_size])
    return windows

In [29]:
sp500_df = create_ta_features()
sp500_df.head(20)

[*********************100%***********************]  1 of 1 completed


  for x, y in zip(x.fillna(0).iteritems(), y.iteritems()):
  for x, y in zip(x.fillna(0).iteritems(), y.iteritems()):
  for x, y in zip(x.fillna(0).iteritems(), y.iteritems()):
  for x, y in zip(x.fillna(0).iteritems(), y.iteritems()):
  for x, y in zip(x.fillna(0).iteritems(), y.iteritems()):
  sc.iteritems(), sma.shift().iteritems(), ohlc[column].iteritems()


Unnamed: 0_level_0,open,high,low,close,volume,ADL,ADX,AO,ATR,BBWIDTH,...,VAMA,VBM,VFI,VPT,VWAP,VZO,WILLIAMS,WMA,WOBV,ZLEMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,-1.299113,-1.286708,-1.293664,-1.282928,0.049746,-1.833595,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.758947,-1.302675,-0.247649,-0.179769,-1.272791,-1.100389,-1.275609
2010-01-05,-1.282826,-1.283933,-1.280318,-1.279357,-1.377868,-1.830133,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.755258,-1.296173,1.659163,-0.179769,-1.272791,-1.100389,-1.275609
2010-01-06,-1.279741,-1.281359,-1.275947,-1.27873,0.983417,-1.82858,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.75163,-1.288821,2.959055,-0.179769,-1.272791,-1.098638,-1.275609
2010-01-07,-1.279174,-1.278071,-1.278627,-1.274127,1.266984,-1.822062,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.744774,-1.284374,3.464771,-0.179769,-1.272791,-1.085015,-1.275609
2010-01-08,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.816325,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.739065,-1.280347,3.68797,-0.179769,-1.272791,-1.076811,-1.275609
2010-01-09,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.810589,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.733357,-1.277707,2.766435,-0.179769,-1.272791,-1.076811,-1.275609
2010-01-10,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.804853,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.727649,-1.275841,2.125317,-0.179769,-1.272791,-1.076811,-1.275609
2010-01-11,-1.269371,-1.270752,-1.267726,-1.268775,0.301304,-1.803112,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.726146,-1.27317,2.544684,-0.179769,-1.272791,-1.071975,-1.275609
2010-01-12,-1.271546,-1.276714,-1.278168,-1.27966,0.739357,-1.804878,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.734095,-1.273745,1.095195,-0.179769,-1.272791,-1.100803,-1.275609
2010-01-13,-1.278122,-1.272099,-1.276732,-1.27009,0.220027,-1.801032,6.432831,-0.703535,-0.730905,0.147278,...,-1.274752,0.604053,-0.437061,-1.727963,-1.272841,1.633398,-0.179769,-1.271787,-1.078391,-1.275609


In [30]:
sp500_df_with_time = add_time_features(sp500_df)
sp500_df_with_time.head(20)

Unnamed: 0_level_0,open,high,low,close,volume,ADL,ADX,AO,ATR,BBWIDTH,...,WILLIAMS,WMA,WOBV,ZLEMA,sin_weekday,cos_weekday,sin_monthday,cos_monthday,sin_yearday,cos_yearday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,-1.299113,-1.286708,-1.293664,-1.282928,0.049746,-1.833595,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.100389,-1.275609,0.0,1.0,0.724793,0.688967,0.068802,0.99763
2010-01-05,-1.282826,-1.283933,-1.280318,-1.279357,-1.377868,-1.830133,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.100389,-1.275609,0.781831,0.62349,0.848644,0.528964,0.085965,0.996298
2010-01-06,-1.279741,-1.281359,-1.275947,-1.27873,0.983417,-1.82858,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.098638,-1.275609,0.974928,-0.222521,0.937752,0.347305,0.103102,0.994671
2010-01-07,-1.279174,-1.278071,-1.278627,-1.274127,1.266984,-1.822062,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.085015,-1.275609,0.433884,-0.900969,0.988468,0.151428,0.120208,0.992749
2010-01-08,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.816325,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.076811,-1.275609,-0.433884,-0.900969,0.998717,-0.050649,0.137279,0.990532
2010-01-09,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.810589,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.076811,-1.275609,-0.974928,-0.222521,0.968077,-0.250653,0.154309,0.988023
2010-01-10,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.804853,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.076811,-1.275609,-0.781831,0.62349,0.897805,-0.440394,0.171293,0.98522
2010-01-11,-1.269371,-1.270752,-1.267726,-1.268775,0.301304,-1.803112,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.071975,-1.275609,0.0,1.0,0.790776,-0.612106,0.188227,0.982126
2010-01-12,-1.271546,-1.276714,-1.278168,-1.27966,0.739357,-1.804878,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.100803,-1.275609,0.781831,0.62349,0.651372,-0.758758,0.205104,0.97874
2010-01-13,-1.278122,-1.272099,-1.276732,-1.27009,0.220027,-1.801032,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.271787,-1.078391,-1.275609,0.974928,-0.222521,0.485302,-0.874347,0.221922,0.975065


In [31]:
sliding_window_split = sliding_window(sp500_df_with_time, window_size=10)
sliding_window_split[0]

Unnamed: 0_level_0,open,high,low,close,volume,ADL,ADX,AO,ATR,BBWIDTH,...,WILLIAMS,WMA,WOBV,ZLEMA,sin_weekday,cos_weekday,sin_monthday,cos_monthday,sin_yearday,cos_yearday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,-1.299113,-1.286708,-1.293664,-1.282928,0.049746,-1.833595,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.100389,-1.275609,0.0,1.0,0.724793,0.688967,0.068802,0.99763
2010-01-05,-1.282826,-1.283933,-1.280318,-1.279357,-1.377868,-1.830133,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.100389,-1.275609,0.781831,0.62349,0.848644,0.528964,0.085965,0.996298
2010-01-06,-1.279741,-1.281359,-1.275947,-1.27873,0.983417,-1.82858,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.098638,-1.275609,0.974928,-0.222521,0.937752,0.347305,0.103102,0.994671
2010-01-07,-1.279174,-1.278071,-1.278627,-1.274127,1.266984,-1.822062,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.085015,-1.275609,0.433884,-0.900969,0.988468,0.151428,0.120208,0.992749
2010-01-08,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.816325,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.076811,-1.275609,-0.433884,-0.900969,0.998717,-0.050649,0.137279,0.990532
2010-01-09,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.810589,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.076811,-1.275609,-0.974928,-0.222521,0.968077,-0.250653,0.154309,0.988023
2010-01-10,-1.274875,-1.275126,-1.273635,-1.270798,0.428625,-1.804853,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.076811,-1.275609,-0.781831,0.62349,0.897805,-0.440394,0.171293,0.98522
2010-01-11,-1.269371,-1.270752,-1.267726,-1.268775,0.301304,-1.803112,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.071975,-1.275609,0.0,1.0,0.790776,-0.612106,0.188227,0.982126
2010-01-12,-1.271546,-1.276714,-1.278168,-1.27966,0.739357,-1.804878,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.272791,-1.100803,-1.275609,0.781831,0.62349,0.651372,-0.758758,0.205104,0.97874
2010-01-13,-1.278122,-1.272099,-1.276732,-1.27009,0.220027,-1.801032,6.432831,-0.703535,-0.730905,0.147278,...,-0.179769,-1.271787,-1.078391,-1.275609,0.974928,-0.222521,0.485302,-0.874347,0.221922,0.975065
