In [1]:
import numpy as np
import pandas as pd
import Quandl
import os

In [2]:
def load(ticker):
    """Load data from Quandl into a dataframe, modify column names and
    check for non-numeric values."""
    # Grab the Quandl token
    token = os.environ.get('QUANDL_TOKEN')
    if token is None:
        token = input("Enter Quandl token: ")
    # Call Quandl module, trim input by default from 1990 forward
    df = Quandl.get(ticker, authtoken=token, trim_start='1990-01-01')
    # Get the column labels
    old_columns = list(df.columns.values)
    # Use the ticker symbol as our new prefix
    ticker_tag = ticker.split('_')[-1] + '_'
    # Drop spaces and concatenate
    new_labels = [ticker_tag + i.replace(' ', '') for i in old_columns]
    # Create a dictionary of old and new column labels
    new_columns = dict(zip(old_columns, new_labels))
    # Rename the columns using our dictionary
    df = df.rename(columns=new_columns)
    nulls = df[~df.applymap(np.isreal).all(1)]
    # Check for non-numeric values
    if len(nulls) > 0:
        raise ValueError('Dataframe contains non-numeric values')
    row_count = len(df)
    print('%d rows loaded into dataframe.' % row_count)
    return df

In [171]:
df = load('YAHOO/INDEX_GSPC')

6566 rows loaded into dataframe.


In [172]:
def sma(df, column, period, **kwargs):
    """Given a dataframe, a column name and a period the function
    returns a dataframe with new column with a simple moving average
    for the period."""
    if 'label' in kwargs:
        column_label = kwargs['label'] + '_' + str(period) + '-day'
    else:
        column_label = 'SMA_' + column + '_' + str(period) + '-day'
    df[column_label] = pd.stats.moments.rolling_mean(df[column], period)
    return df

df = sma(df, 'GSPC_Close', 20)
df = sma(df, 'GSPC_Close', 10)
df.tail() 

Unnamed: 0_level_0,GSPC_Open,GSPC_High,GSPC_Low,GSPC_Close,GSPC_Volume,GSPC_AdjClose,SMA_GSPC_Close_20-day,SMA_GSPC_Close_10-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-14,1891.680054,1934.469971,1878.930054,1921.839966,5241110000,1921.839966,2005.36452,1960.316003
2016-01-15,1916.680054,1916.680054,1857.829956,1880.329956,5468460000,1880.329956,1995.727515,1943.955005
2016-01-19,1888.660034,1901.439941,1864.599976,1881.329956,4928350000,1881.329956,1987.699512,1930.821997
2016-01-20,1876.180054,1876.180054,1812.290039,1859.329956,6416070000,1859.329956,1980.388507,1915.083997
2016-01-21,1861.459961,1889.849976,1848.97998,1868.98999,5078810000,1868.98999,1972.780505,1902.956995


In [173]:
def diff(df, column_a, column_b, **kwargs):
    """Creates a new column from the differnce of column_a and column_b, 
    as column_a minus column_b."""
    column_a_suffix = column_a.split('_')[-1]
    column_b_suffix = column_b.split('_')[-1]
    column_prefix = "_".join(column_b.split('_')[0:2])
    if 'label' in kwargs:
        column_label = kwargs['label']
    else:
        column_label = 'Delta_' + column_prefix + '_' + column_a_suffix + '_' + column_b_suffix
    df[column_label] = df[column_a] - df[column_b]
    return df

df = diff(df, 'SMA_GSPC_Close_20-day', 'SMA_GSPC_Close_10-day', label='MACD_GSPC_Close')
df.tail()

Unnamed: 0_level_0,GSPC_Open,GSPC_High,GSPC_Low,GSPC_Close,GSPC_Volume,GSPC_AdjClose,SMA_GSPC_Close_20-day,SMA_GSPC_Close_10-day,MACD_GSPC_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-01-14,1891.680054,1934.469971,1878.930054,1921.839966,5241110000,1921.839966,2005.36452,1960.316003,45.048517
2016-01-15,1916.680054,1916.680054,1857.829956,1880.329956,5468460000,1880.329956,1995.727515,1943.955005,51.77251
2016-01-19,1888.660034,1901.439941,1864.599976,1881.329956,4928350000,1881.329956,1987.699512,1930.821997,56.877515
2016-01-20,1876.180054,1876.180054,1812.290039,1859.329956,6416070000,1859.329956,1980.388507,1915.083997,65.30451
2016-01-21,1861.459961,1889.849976,1848.97998,1868.98999,5078810000,1868.98999,1972.780505,1902.956995,69.823511


In [174]:
df = sma(df, 'MACD_GSPC_Close', 3, label='SMA_MACD_GSPC_Close')
df.tail()

Unnamed: 0_level_0,GSPC_Open,GSPC_High,GSPC_Low,GSPC_Close,GSPC_Volume,GSPC_AdjClose,SMA_GSPC_Close_20-day,SMA_GSPC_Close_10-day,MACD_GSPC_Close,SMA_MACD_GSPC_Close_3-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-14,1891.680054,1934.469971,1878.930054,1921.839966,5241110000,1921.839966,2005.36452,1960.316003,45.048517,35.591172
2016-01-15,1916.680054,1916.680054,1857.829956,1880.329956,5468460000,1880.329956,1995.727515,1943.955005,51.77251,44.598678
2016-01-19,1888.660034,1901.439941,1864.599976,1881.329956,4928350000,1881.329956,1987.699512,1930.821997,56.877515,51.232847
2016-01-20,1876.180054,1876.180054,1812.290039,1859.329956,6416070000,1859.329956,1980.388507,1915.083997,65.30451,57.984845
2016-01-21,1861.459961,1889.849976,1848.97998,1868.98999,5078810000,1868.98999,1972.780505,1902.956995,69.823511,64.001845


In [138]:
bu = df.copy()

In [176]:
df = diff(df, 'MACD_GSPC_Close', 'SMA_MACD_GSPC_Close_3-day', label='MACD-Delta-3-day')
df.tail()

Unnamed: 0_level_0,GSPC_Open,GSPC_High,GSPC_Low,GSPC_Close,GSPC_Volume,GSPC_AdjClose,SMA_GSPC_Close_20-day,SMA_GSPC_Close_10-day,MACD_GSPC_Close,SMA_MACD_GSPC_Close_3-day,MACD-Delta-3-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-14,1891.680054,1934.469971,1878.930054,1921.839966,5241110000,1921.839966,2005.36452,1960.316003,45.048517,35.591172,9.457345
2016-01-15,1916.680054,1916.680054,1857.829956,1880.329956,5468460000,1880.329956,1995.727515,1943.955005,51.77251,44.598678,7.173832
2016-01-19,1888.660034,1901.439941,1864.599976,1881.329956,4928350000,1881.329956,1987.699512,1930.821997,56.877515,51.232847,5.644668
2016-01-20,1876.180054,1876.180054,1812.290039,1859.329956,6416070000,1859.329956,1980.388507,1915.083997,65.30451,57.984845,7.319666
2016-01-21,1861.459961,1889.849976,1848.97998,1868.98999,5078810000,1868.98999,1972.780505,1902.956995,69.823511,64.001845,5.821665


In [182]:
def flag_swings(df, column, period):
    """Given a dataframe and column and a minimum sequence period 
    for the same sign, the function returns: "1" for upward swings,
    "-1" for downward swings, or "0" if niether condition is met."""
    
    # Trim null value artifacts in SMA columns
    df = df.dropna() 
    # Create a temporary dataframe
    tmp = df.copy()
    tmp['sum'] = 0
    # Determine the sign of each day and sum signs from prior days
    tmp['sign-0'] = [1 if x >= 0 else -1 for x in df[column]]
    if period < 2:
        raise ValueError('The minimum swing period is 2.')
    else:
        for i in range(1, period):
            label = 'sign-' + str(i)
            # .shift moves the rows down by i
            tmp[label] = tmp['sign-0'].shift(i)
            tmp['sum'] = tmp['sum'] + tmp[label]
    tmp['sum-shift'] = tmp['sum'].shift(1)

    def flagger(sign_now, sign_prior, sign_run, sign_sum, period):
        if sign_now > sign_prior and abs(sign_run) >= period - 1 and sign_sum != 0:
            return 1  # Upward swing
        else: 
            if sign_now < sign_prior and abs(sign_run) >= period - 1 and sign_sum != 0:
                return -1  # Downward swing
            else:
                return 0

    column_label = str(period) + '-day_SwingFlag_' + column
    df[column_label] = [flagger(n, p, r, s, period) for n, p, r, s in zip(tmp['sign-0'], tmp['sign-1'], tmp['sum-shift'], tmp['sum'])]
    tmp.head(430)
    return df

df = flag_swings(df, 'MACD-Delta-3-day', 3)
    
    

In [180]:
df.to_csv('data/sma-diff-swingflag.csv')

In [181]:
df

Unnamed: 0_level_0,GSPC_Open,GSPC_High,GSPC_Low,GSPC_Close,GSPC_Volume,GSPC_AdjClose,SMA_GSPC_Close_20-day,SMA_GSPC_Close_10-day,MACD_GSPC_Close,SMA_MACD_GSPC_Close_3-day,MACD-Delta-3-day,3-day_SwingFlag_MACD-Delta-3-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1990-01-31,322.980011,329.079987,322.980011,329.079987,189660000,329.079987,338.046500,329.872998,8.173502,8.627669,-0.454167,0
1990-02-01,329.079987,329.859985,327.760010,328.790009,154580000,328.790009,336.702500,328.932999,7.769501,8.256168,-0.486667,0
1990-02-02,328.790009,332.100006,328.089996,330.920013,164400000,330.920013,335.638500,328.110001,7.528499,7.823834,-0.295335,0
1990-02-05,330.920013,332.160004,330.450012,331.850006,130950000,331.850006,334.541499,328.257001,6.284499,7.194166,-0.909667,0
1990-02-06,331.850006,331.859985,328.200012,329.660004,134070000,329.660004,333.543500,328.062003,5.481497,6.431498,-0.950001,0
1990-02-07,329.660004,333.760010,326.549988,333.750000,186710000,333.750000,332.865500,328.411002,4.454498,5.406831,-0.952333,0
1990-02-08,333.750000,336.089996,332.000000,332.959991,176240000,332.959991,332.087000,329.099002,2.987997,4.307998,-1.320000,0
1990-02-09,333.019989,334.600006,332.410004,333.619995,146910000,333.619995,331.771500,329.881003,1.890497,3.110998,-1.220501,0
1990-02-12,333.619995,333.619995,329.970001,330.079987,118390000,330.079987,331.425499,330.369000,1.056499,1.978331,-0.921832,0
1990-02-13,330.079987,331.609985,327.920013,331.019989,144490000,331.019989,330.938998,331.172998,-0.234000,0.904332,-1.138332,0
