In [87]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import (
    absolute_import, division, print_function, with_statement,
    unicode_literals
)

import numpy as np
import pandas as pd
import Quandl
import os
# from sqlalchemy import sqlalchemy  # from flask.ext.


class twitfin(object):
    """This is a description of the class."""

    #: An example class variable.
    aClassVariable = True

    def __init__(self, argumentName, anOptionalArg=None):
        """Initialization method.
        :param argumentName: an example argument.
        :type argumentName: string
        :param anOptionalArg: an optional argument.
        :type anOptionalArg: string
        :returns: New instance of :class:`twitfin`
        :rtype: twitfin
        """

        self.instanceVariable1 = argumentName

        if self.aClassVariable:
            print('Hello')

        if anOptionalArg:
            print('anOptionalArg: %s' % anOptionalArg)

def load(*args, **kwargs):
    """Load data from Quandl into a dataframe, modify column names and
    check for non-numeric values."""
    # Grab the Quandl token
    # token = os.environ.get('QUANDL_TOKEN')
    # if token is None:
    token = input("Enter Quandl token: ")
    ticker = input("Enter Quandl ticker symbol (or hit Enter for default of YAHOO/INDEX_GSPC): ")
    if len(ticker) < 1:
        ticker = 'YAHOO/INDEX_GSPC'
    print(ticker)
    start_date = input("Enter start date as YYYY-MM-DD (or hit ENTER for default of 1990-01-01): ")
    if len(start_date) < 1:
        start_date = '1990-01-01'
    print(start_date)
    # Call Quandl module, trim input by default from 1990 forward
    print('Pulling Quandl data...')
    df = Quandl.get(ticker, authtoken=token, trim_start=start_date)
    # Get the column labels
    # old_columns = list(df.columns.values)
    # Use the ticker symbol as our new prefix
    # ticker_tag = ticker.split('_')[-1] + '_'
    # Drop spaces and concatenate
    # new_labels = [ticker_tag + i.replace(' ', '') for i in old_columns]
    # Create a dictionary of old and new column labels
    # new_columns = dict(zip(old_columns, new_labels))
    # Rename the columns using our dictionary
    # df = df.rename(columns=new_columns)
    nulls = df[~df.applymap(np.isreal).all(1)]
    # Check for non-numeric values
    if len(nulls) > 0:
        raise ValueError('Dataframe contains non-numeric values')
    row_count = len(df)
    print('%d rows loaded into dataframe.' % row_count)
    return df

def long_sma(df, column, *args, **kwargs):
    """Given a dataframe, a column name and a period the function
    returns a dataframe with new column with a simple moving average
    for the period."""
    ### SMA function parameters
    # 1st parameter: target dataframe
    # 2nd parameter: target column
    # 3rd parameter: the period for the moving average
    # 4th paramter, optional: supply a label to be appended with period info,
    # for example df = twitfin.sma(df, 'GSPC_Close', 20, label='Close')
    # will result in a column label of 'Close_20-day'.
    # The default label is constructed as follows:
    # SMA_{ target column }_{ period }-day
    period = int(input("Enter the period in days for the long SMA: "))
    if 'label' in kwargs:
        column_label = kwargs['label'] + '_' + str(period) + '-day'
    else:
        column_label = 'SMA_' + column + '_' + str(period) + '-day'
    df[column_label] = pd.stats.moments.rolling_mean(df[column], period)
    return df

def short_sma(df, column, *args, **kwargs):
    """Given a dataframe, a column name and a period the function
    returns a dataframe with new column with a simple moving average
    for the period."""
    period = int(input("Enter the period in days for the short SMA: "))
    if 'label' in kwargs:
        column_label = kwargs['label'] + '_' + str(period) + '-day'
    else:
        column_label = 'SMA_' + column + '_' + str(period) + '-day'
    df[column_label] = pd.stats.moments.rolling_mean(df[column], period)
    return df

def diff(df, column_a, column_b, **kwargs):
    """Creates a new column from the differnce of column_a and column_b,
    as column_a minus column_b."""
    ### diff function parameters
    # 1st parameter: target dataframe
    # 2nd parameter: target column_a
    # 3rd parameter: target column_b
    # TODO: describe default label and custom label options
    column_a_suffix = column_a.split('_')[-1]
    column_b_suffix = column_b.split('_')[-1]
    column_prefix = "_".join(column_b.split('_')[0:2])
    if 'label' in kwargs:
        column_label = kwargs['label']
    else:
        column_label = 'Delta_' + column_prefix + '_' + column_a_suffix + '_' + column_b_suffix
    df[column_label] = df[column_a] - df[column_b]
    return df

def macd(df, column, *args, **kwargs):
    """Given a dataframe, a column name and a period the function
    returns a dataframe with new column with a simple moving average
    for the period."""
    period = int(input("Enter the period in days for the SMA of the MACD: "))
    if 'label' in kwargs:
        column_label = kwargs['label'] + '_' + str(period) + '-day'
    else:
        column_label = 'SMA_' + column + '_' + str(period) + '-day'
    df[column_label] = pd.stats.moments.rolling_mean(df[column], period)
    return df

def flag_swings(df, column, *args, **kwargs):
    """Given a dataframe and column and a minimum sequence period
    for the same sign, the function returns: "1" for upward swings,
    "-1" for downward swings, or "0" if niether condition is met."""
    ### flag_swings function parameters
    # 1st parameter: target dataframe
    # 2nd parameter: target column
    # 3rd parameter: minimum swing period
    # TODO: describe default label and custom label options
    period = int(input("Enter the period in days to flag swings: "))
    if 'label' in kwargs:
        # Append custom label with period days
        column_label = kwargs['label'] + '_' + str(period) + '-day'
    else:
        column_label = 'SwingFlag_' + str(period) + '-day'
    # Trim null value artifacts in SMA columns
    df = df.dropna()
    # Create a temporary dataframe
    tmp = df.copy()
    tmp['sum'] = 0
    # Determine the sign of each day and sum signs from prior days using the
    # "x-day" notation as "sign-'reference day'"
    tmp['sign-0'] = [1 if x >= 0 else -1 for x in df[column]]
    if period < 2:
        raise ValueError('The minimum swing period should be 2 days.')
    else:
        # Shift rows down for lateral comparison depending on period
        for i in range(1, period):
            label = 'sign-' + str(i)
            tmp[label] = tmp['sign-0'].shift(i)
            # The sum of consecutive signs agregates here
            tmp['sum'] = tmp['sum'] + tmp[label]

    # The we shift the sum signs by one to compare prior sequence history
    tmp['sum-shift'] = tmp['sum'].shift(1)

    def flagger(sign_now, sign_prior, sign_run, sign_sum, period):
        # flagger contains the logical for lateral comparison of time-shifted
        # sign data, agregations and time-shifted agregations
        if sign_now > sign_prior and abs(sign_run) >= period - 1 and sign_sum != 0:
            # Indicates a positive sign after a sufficient period of negative signs
            return 1  # Also referred to here as an upward swing or crossover
        else:
            if sign_now < sign_prior and abs(sign_run) >= period - 1 and sign_sum != 0:
                # Indicates a negative sign after a sufficient period of positive signs
                return -1  # Also referred to here as an downward swing or crossover
            else:
                # Otherwaise returning zero. Zero could still be a sign change
                # but prior minimum sign sequence period criteria was not met.
                return 0

    try:
        df = df.copy()
        df[column_label] = [flagger(n, p, r, s, period) for n, p, r, s in zip(tmp['sign-0'], tmp['sign-1'], tmp['sum-shift'], tmp['sum'])]
    except Exception as e:
        print(e)
        if e =='SettingWithCopyWarning':
            pass
    return df

def sign_sequence(df, column, *args, **kwargs):
    """Given a dataframe and column of signs (-1, 0, 1), returns a
    column with a list of prior signs for the given period."""
    period = int(input("Enter the days prior to list the signs: "))
    prior_signs_label = 'SignSequence_' + str(period) + '-days'
    # Trim null value artifacts in SMA columns
    df = df.dropna()
    # Create a temporary dataframe
    tmp = df.copy()
    # Determine the sign of each day and sum signs from prior days using the
    # "x-day" notation as "sign-'reference day'"
    tmp['sign-0'] = ['1' if x >= 0 else '-1' for x in df[column]]
    # Shift rows down for lateral comparison depending on period
    labels = ['sign-0']
    for i in range(1, period):
        label = 'sign-' + str(i)
        labels.append(label)
        tmp[label] = tmp['sign-0'].shift(i)
    tmp2 = tmp.ix[(period -1):]
    df2 = df.ix[(period -1):]
    labels = labels[::-1]
    try:
        df2 = df2.copy()
        df2[prior_signs_label] = tmp2[labels].apply(lambda x: ','.join(x), axis=1)
    except Exception as e:
        print(e)
        if e =='SettingWithCopyWarning':
            pass
    return df2

def x_days(df):
    """Add a column with a descending counter."""
    # One paramter: target dataframe
    df['x-day'] = ['x-' + str(i) for i in range(len(df) - 1, -1, -1)]
    return df

def x_transpose(df):
    """Transpose the dataframe and set the x-days as the column labels."""
    # One paramter: target dataframe, assumes an x-day column has been created
    df = df.set_index('x-day')
    df = df.transpose()
    pd.options.display.float_format = '{:.3f}'.format
    return df

def read_csv(filename, *args, **kwargs):
    """read_csv is a port of the Pandas read_csv module."""
    return pd.read_csv(filename, *args)

def read_sql(table, db, *args, **kwargs):
    """read_sql is a port of the Pandas read_sql module."""
    return pd.read_sql(table, db, *args, **kwargs)

def db_connection(uri):
    """db_connection is a port of the SQLAlchemy create_engine module."""
    return sqlalchemy.create_engine(uri)


In [62]:
df = load()
# print(df.tail())  # Inspect the loaded data via terminal output

Enter Quandl token: vQaiAyaQrUAaxpEdNijb
Enter Quandl ticker symbol (or hit Enter for default of YAHOO/INDEX_GSPC): 
YAHOO/INDEX_GSPC
Enter start date as YYYY-MM-DD (or hit ENTER for default of 1990-01-01): 
1990-01-01
Pulling Quandl data...
6573 rows loaded into dataframe.


In [63]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1990-01-02,353.399994,359.690002,351.980011,359.690002,162070000,359.690002
1990-01-03,359.690002,360.589996,357.890015,358.760010,192330000,358.760010
1990-01-04,358.760010,358.760010,352.890015,355.670013,177000000,355.670013
1990-01-05,355.670013,355.670013,351.350006,352.200012,158530000,352.200012
1990-01-08,352.200012,354.239990,350.540009,353.790009,140110000,353.790009
1990-01-09,353.829987,354.170013,349.609985,349.619995,155210000,349.619995
1990-01-10,349.619995,349.619995,344.320007,347.309998,175990000,347.309998
1990-01-11,347.309998,350.140015,347.309998,348.529999,154390000,348.529999
1990-01-12,348.529999,348.529999,339.489990,339.929993,183880000,339.929993
1990-01-15,339.929993,339.940002,336.570007,337.000000,140590000,337.000000


In [64]:
# Add a columns with long and short simple moving average columns
df = long_sma(df, 'Close')
print('* Added column: ' + list(df.columns.values)[-1:][0])
df = short_sma(df, 'Close')
print('* Added column: ' + list(df.columns.values)[-1:][0])

Enter the period in days for the long SMA: 20
* Added column: SMA_Close_20-day
Enter the period in days for the short SMA: 10
* Added column: SMA_Close_10-day


In [65]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,SMA_Close_20-day,SMA_Close_10-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1990-01-02,353.399994,359.690002,351.980011,359.690002,162070000,359.690002,,
1990-01-03,359.690002,360.589996,357.890015,358.760010,192330000,358.760010,,
1990-01-04,358.760010,358.760010,352.890015,355.670013,177000000,355.670013,,
1990-01-05,355.670013,355.670013,351.350006,352.200012,158530000,352.200012,,
1990-01-08,352.200012,354.239990,350.540009,353.790009,140110000,353.790009,,
1990-01-09,353.829987,354.170013,349.609985,349.619995,155210000,349.619995,,
1990-01-10,349.619995,349.619995,344.320007,347.309998,175990000,347.309998,,
1990-01-11,347.309998,350.140015,347.309998,348.529999,154390000,348.529999,,
1990-01-12,348.529999,348.529999,339.489990,339.929993,183880000,339.929993,,
1990-01-15,339.929993,339.940002,336.570007,337.000000,140590000,337.000000,,350.250003


In [66]:
# Add a column with the MACD from the long and short simple moving average columns
sma_columns = list(df.columns.values)[-2:]
df = diff(df, sma_columns[0], sma_columns[1], label='MACD_Close')
print('* Added column: ' + list(df.columns.values)[-1:][0])

* Added column: MACD_Close


In [67]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,SMA_Close_20-day,SMA_Close_10-day,MACD_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1990-01-02,353.399994,359.690002,351.980011,359.690002,162070000,359.690002,,,
1990-01-03,359.690002,360.589996,357.890015,358.760010,192330000,358.760010,,,
1990-01-04,358.760010,358.760010,352.890015,355.670013,177000000,355.670013,,,
1990-01-05,355.670013,355.670013,351.350006,352.200012,158530000,352.200012,,,
1990-01-08,352.200012,354.239990,350.540009,353.790009,140110000,353.790009,,,
1990-01-09,353.829987,354.170013,349.609985,349.619995,155210000,349.619995,,,
1990-01-10,349.619995,349.619995,344.320007,347.309998,175990000,347.309998,,,
1990-01-11,347.309998,350.140015,347.309998,348.529999,154390000,348.529999,,,
1990-01-12,348.529999,348.529999,339.489990,339.929993,183880000,339.929993,,,
1990-01-15,339.929993,339.940002,336.570007,337.000000,140590000,337.000000,,350.250003,


In [68]:
# Add a column with a SMA of the last MACD column
macd_column = list(df.columns.values)[-1:][0]
df = macd(df, macd_column, label='SMA_' + macd_column)
print('* Added column: ' + list(df.columns.values)[-1:][0])

Enter the period in days for the SMA of the MACD: 3
* Added column: SMA_MACD_Close_3-day


In [69]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,SMA_Close_20-day,SMA_Close_10-day,MACD_Close,SMA_MACD_Close_3-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1990-01-02,353.399994,359.690002,351.980011,359.690002,162070000,359.690002,,,,
1990-01-03,359.690002,360.589996,357.890015,358.760010,192330000,358.760010,,,,
1990-01-04,358.760010,358.760010,352.890015,355.670013,177000000,355.670013,,,,
1990-01-05,355.670013,355.670013,351.350006,352.200012,158530000,352.200012,,,,
1990-01-08,352.200012,354.239990,350.540009,353.790009,140110000,353.790009,,,,
1990-01-09,353.829987,354.170013,349.609985,349.619995,155210000,349.619995,,,,
1990-01-10,349.619995,349.619995,344.320007,347.309998,175990000,347.309998,,,,
1990-01-11,347.309998,350.140015,347.309998,348.529999,154390000,348.529999,,,,
1990-01-12,348.529999,348.529999,339.489990,339.929993,183880000,339.929993,,,,
1990-01-15,339.929993,339.940002,336.570007,337.000000,140590000,337.000000,,350.250003,,


In [70]:
# Add a column with a MACD the last two columns
macd_sma_columns = list(df.columns.values)[-2:]
df = diff(df, macd_sma_columns[0], macd_sma_columns[1])
print('* Added column: ' + list(df.columns.values)[-1:][0])

* Added column: Delta_SMA_MACD_Close_3-day


In [71]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,SMA_Close_20-day,SMA_Close_10-day,MACD_Close,SMA_MACD_Close_3-day,Delta_SMA_MACD_Close_3-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1990-01-02,353.399994,359.690002,351.980011,359.690002,162070000,359.690002,,,,,
1990-01-03,359.690002,360.589996,357.890015,358.760010,192330000,358.760010,,,,,
1990-01-04,358.760010,358.760010,352.890015,355.670013,177000000,355.670013,,,,,
1990-01-05,355.670013,355.670013,351.350006,352.200012,158530000,352.200012,,,,,
1990-01-08,352.200012,354.239990,350.540009,353.790009,140110000,353.790009,,,,,
1990-01-09,353.829987,354.170013,349.609985,349.619995,155210000,349.619995,,,,,
1990-01-10,349.619995,349.619995,344.320007,347.309998,175990000,347.309998,,,,,
1990-01-11,347.309998,350.140015,347.309998,348.529999,154390000,348.529999,,,,,
1990-01-12,348.529999,348.529999,339.489990,339.929993,183880000,339.929993,,,,,
1990-01-15,339.929993,339.940002,336.570007,337.000000,140590000,337.000000,,350.250003,,,


In [72]:
# Add a column that flags the crossovers or sign swings over a given period
delta_column = list(df.columns.values)[-1:][0]
df = flag_swings(df, delta_column)
print('* Added column: ' + list(df.columns.values)[-1:][0])

Enter the period in days to flag swings: 3
* Added column: SwingFlag_3-day


In [73]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,SMA_Close_20-day,SMA_Close_10-day,MACD_Close,SMA_MACD_Close_3-day,Delta_SMA_MACD_Close_3-day,SwingFlag_3-day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1990-01-31,322.980011,329.079987,322.980011,329.079987,189660000,329.079987,338.046500,329.872998,8.173502,8.627669,-0.454167,0
1990-02-01,329.079987,329.859985,327.760010,328.790009,154580000,328.790009,336.702500,328.932999,7.769501,8.256168,-0.486667,0
1990-02-02,328.790009,332.100006,328.089996,330.920013,164400000,330.920013,335.638500,328.110001,7.528499,7.823834,-0.295335,0
1990-02-05,330.920013,332.160004,330.450012,331.850006,130950000,331.850006,334.541499,328.257001,6.284499,7.194166,-0.909667,0
1990-02-06,331.850006,331.859985,328.200012,329.660004,134070000,329.660004,333.543500,328.062003,5.481497,6.431498,-0.950001,0
1990-02-07,329.660004,333.760010,326.549988,333.750000,186710000,333.750000,332.865500,328.411002,4.454498,5.406831,-0.952333,0
1990-02-08,333.750000,336.089996,332.000000,332.959991,176240000,332.959991,332.087000,329.099002,2.987997,4.307998,-1.320000,0
1990-02-09,333.019989,334.600006,332.410004,333.619995,146910000,333.619995,331.771500,329.881003,1.890497,3.110998,-1.220501,0
1990-02-12,333.619995,333.619995,329.970001,330.079987,118390000,330.079987,331.425499,330.369000,1.056499,1.978331,-0.921832,0
1990-02-13,330.079987,331.609985,327.920013,331.019989,144490000,331.019989,330.938998,331.172998,-0.234000,0.904332,-1.138332,0


In [88]:
df = sign_sequence(df, delta_column)
print('* Added column: ' + list(df.columns.values)[-1:][0])

Enter the days prior to list the signs: 3
* Added column: SignSequence_3-days


In [89]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,SMA_Close_20-day,SMA_Close_10-day,MACD_Close,SMA_MACD_Close_3-day,Delta_SMA_MACD_Close_3-day,SwingFlag_3-day,PriorSigns_3-days,SignSequence_3-days
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1990-02-12,333.619995,333.619995,329.970001,330.079987,118390000,330.079987,331.425499,330.369000,1.056499,1.978331,-0.921832,0,"-1,-1,-1","-1,-1,-1"
1990-02-13,330.079987,331.609985,327.920013,331.019989,144490000,331.019989,330.938998,331.172998,-0.234000,0.904332,-1.138332,0,"-1,-1,-1","-1,-1,-1"
1990-02-14,331.019989,333.200012,330.640015,332.010010,138530000,332.010010,330.669499,331.466000,-0.796501,0.008666,-0.805167,0,"-1,-1,-1","-1,-1,-1"
1990-02-15,332.010010,335.209991,331.609985,334.890015,174620000,334.890015,330.504500,332.076001,-1.571501,-0.867334,-0.704167,0,"-1,-1,-1","-1,-1,-1"
1990-02-16,334.890015,335.640015,332.420013,332.720001,166840000,332.720001,330.183000,332.256000,-2.073000,-1.480334,-0.592666,0,"-1,-1,-1","-1,-1,-1"
1990-02-20,332.720001,332.720001,326.260010,327.989990,147300000,327.989990,330.063499,331.869998,-1.806499,-1.817000,0.010501,1,"1,-1,-1","-1,-1,1"
1990-02-21,327.910004,328.170013,324.470001,327.670013,159240000,327.670013,329.866501,331.670999,-1.804498,-1.894665,0.090167,0,"1,1,-1",-111
1990-02-22,327.670013,330.980011,325.700012,325.700012,184320000,325.700012,329.638501,330.866000,-1.227499,-1.612832,0.385333,0,111,111
1990-02-23,325.700012,326.149994,322.100006,324.149994,148490000,324.149994,329.542001,329.985001,-0.442999,-1.158332,0.715333,0,111,111
1990-02-26,324.160004,328.670013,323.980011,328.670013,148900000,328.670013,329.685503,329.490002,0.195500,-0.491666,0.687166,0,111,111


In [90]:
print('\n')
print('* Current columns: ' + str(list(df.columns.values)))

del_columns = input("Enter the names of columns to delete (seperated by commas): ")

del_columns.split()

for i in del_columns.split():
    df.drop(i, axis=1, inplace=True)
    
file_name = input("Enter the file name to save as a csv: ")
file_name = file_name.split('/')[-1]



* Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close', 'SMA_Close_20-day', 'SMA_Close_10-day', 'MACD_Close', 'SMA_MACD_Close_3-day', 'Delta_SMA_MACD_Close_3-day', 'SwingFlag_3-day', 'PriorSigns_3-days', 'SignSequence_3-days']
