In [None]:
from __future__ import division

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from matplotlib import finance

%matplotlib inline

# Loading and Exploring the Data

In [None]:
data = pd.read_csv('WIKI_20160608.csv', names=['Ticker', 'Date', 'Open', 'High', 'Low', 'Close',
                                              'Volume', '7', '8', 'Adj-O', 'Adj-H', 'Adj-L', 'Adj-C',
                                              'Adj-Vol'])

In [None]:
data.head(5)

Create a new dataframe with only the adjusted data, data, & ticker symbol.

In [None]:
dat = data[['Ticker', 'Date', 'Adj-O', 'Adj-H', 'Adj-L', 'Adj-C', 'Adj-Vol']]

In [None]:
dat.head(5)

In [None]:
dat.shape

Create a dataframe for a specific ticker

In [None]:
agilent = dat[dat['Ticker'] == 'A']

In [None]:
agilent.head()

Pretty straight forward to get a ticker specific set of data.  Below I will create a function to automate the process.

In [None]:
def tick_gettr(ticker):
    data = dat[dat['Ticker'] == ticker]
    return data

I will also create an array for all tickers in the dataset in case that comes in handy.

In [None]:
tickers = dat['Ticker']
tickers = tickers.unique()
len(tickers)

Now I will get a ticker specific set of data to work with.

In [None]:
aapl = tick_gettr('AAPL')

## Basic Calculations

I first need to create the RSI components and determine how many days my moving average will be built with.

I will begin by creating the relative strength component.  Instead of hardcoding a number of days for the moving average I want to be able to look at different averages.  Below I will write a couple of functions to create the RSI.

RS:

In [None]:
def rs(ticker, look_back):
    """
    A function to find the relative strength of a stock using a user specified
    look back period.
    """
    
    #Get the difference between consecutive adjusted closes.
    difference = ticker['Adj-C'].diff()
    difference = difference[1:]
    
    #Define & clean up the up sessions & down sessions.
    up, down = difference.copy(), difference.copy()
    
    up[up < 0] = 0
    down[down > 0] = 0
    
    #Get the absolute value for the down sessions.
    down = down.abs()
    
    #Get the look_back mean for both up & down sessions.
    upper = up.rolling(window=look_back, center=False).mean()
    downer = down.rolling(window=look_back, center=False).mean()
        
    #Calculate the relative strength
    rs = upper / downer
    
    return rs

RSI:

In [None]:
def rsi(ticker, look_back):
    """
    A function that determines the relative strength index of a specific stock.
    """
    
    #Get the relative strength.
    r_s = rs(ticker, look_back)
    #Calculate the RSI.
    rsi = 100.0 - 100.0 / (1.0 + r_s)

    return rsi.round(decimals=2)

The Python code to get the RS calculation was greatly informed from this StackOverflow answer:

##### http://stackoverflow.com/a/29400434/1094634

Also important to note, that some sources will calculate some elements of the RSI differently.  Yahoo Finance & ThinkorSwim for instances calculate it the same (and differently from the above calculation) while Freestockcharts.com (Worden) calculates it the same way as the above functions.

### Buy/Sell Rule:

I will need some rules to determine when to buy & when to sell.  I will begin with a simple buy/sell rule.  Buy @ the closing price when RSI < 30 & sell @ the closing price when RSI > 70.  The 30 & 70 thresholds are the traditional demarcation points for 'oversold' (<30) & 'overbought' (>70) while using the RSI.  These thresholds are ripe for fine tuning.  Initially, I will be using the 14 day average.  The other average that I want to look at is the 9 day.  As with the oversold/overbought level, the optimal average needs to be tested/tuned as well.

Simply, what I want to know is if the RSI predicts higer prices from the buy point.  The actual price itself is not very important (although will be useful.)  If the sell price is consistently higher than the buy price, then I will interpret that as the RSI having some predictive value (whether or not the RSI strategy is more profitable than simply buying & holding the S&P 500 or the individual issues I use is a separate question.)


First, I will get Apples (AAPL) RSI using the 14 day average.

In [None]:
aapl_rsi = rsi(aapl, 14)

Now I want to add a new column to the dataset to include the RSI

In [None]:
aapl['RSI'] = aapl_rsi

In [None]:
aapl.head(2)

Next I want to remove the first 14 columsn with NaNs for the RSI

In [None]:
aapl = aapl[14:]

In [None]:
aapl.head(2)

Now I want to create some more columns that will show the prices for days where the RSI is < 30 & > 70.  Since I am going to do this repeatedly, I want to create a function to handle this process.

In [None]:
def price_columns(data, overbot, oversold):
    """
    A function that adds and populates the 'Sell Price' and 'Buy Price' columns for
    the dataset that is being used.
    
    overbot & oversold are ints
    """
    number_observations = len(data)
    data['Sell Price'] = pd.Series(0, np.arange(number_observations))
    #Populate the 'Sell Price' column with the relevant overbought data.
    data.ix[data.RSI > overbot, 'Sell Price'] = data['Adj-C']
    data['Buy Price'] = pd.Series(0, np.arange(number_observations))
    #Populate the 'Buy Price' column with the relevant oversold data.
    data.ix[data.RSI < oversold, 'Buy Price'] = data['Adj-C']
    
    return data

In [None]:
aapl = price_columns(aapl, 70, 30)

In [None]:
aapl.head(2)

Since I will also be prepping the data for multiple datasets repeatedly I want to do the RSI prep work above automatically as well.  I will write a function to handle prepping the data to include the RSI column.

In [None]:
def rsi_add(data, ave_length):
    """
    A function that will call rsi() to create the relevant RSI data & then add
    the RSI column to the relevant dataset.
    
    ave_length is an int
    """
    data_rsi = rsi(data, ave_length)
    data['RSI'] = data_rsi
    data = data[ave_length:]
    
    return data

I will also create several columns that represent holding periods of 1, 3, 5, & 10 days.  I think it will be beneficial to see whether or not arbitrary holding periods show better returns then the RSI > 70 sell point.  These holding periods can be played with and they are picked to represent the shorter term nature of this type of trade.

In [None]:
base = aapl.copy()
base = base['Adj-C']

day_1 = base.shift(-1)
day_3 = base.shift(-3)
day_5 = base.shift(-5)
day_10 = base.shift(-10)

In [None]:
aapl['Day 1'] = day_1
aapl['Day 3'] = day_3
aapl['Day 5'] = day_5
aapl['Day 10'] = day_10

In [None]:
aapl.head(2)

Now I have a dataset to begin working with.

# Analyzing the Data

### A simple and naive regression

First I am going to run a simple regression of the RSI column against the adjusted close column to see what that looks like.

In [None]:
aapl_prices = aapl[['Adj-C']]
aapl_rsi_d = aapl[['RSI']]

### Splitting the training & test data.

I will do this automatically as well, but since the historical order of the data is important I will write a function to handle this.  I want to train the model on the older data and test it on the newer data to see if there is any predictive abilities.  A **caveat** here, however, this where many of these types of strategies and analytical approaches end up overfitting and producing models the have *disatsrous* results in the real world.

In [None]:
def train_test(data, features, prediction, train):
    """
    A function that splits the dataset into training & testing sets while preserving chronological order.
    
    features = a list of strs that are will be used to train the models.
    prediction = a list of a str that the model will be attempting to predict.
    train = a float that represents the % of the dataset used for the training set.
    """
    #Create our features & predictions
    data_preds = data[prediction]
    data_feats = data[features]
    
    #Use the train percent to get an int that will represent the index cut off point for
    #the data sets
    train_percent = int(round(len(data) * train))
    
    #Split the features & predictions into their train/test splits
    train_preds = data_preds[:train_percent]
    train_feats = data_feats[:train_percent]
    test_preds = data_preds[train_percent:]
    test_feats = data_feats[train_percent:]
    
    return train_preds, train_feats, test_preds, test_feats

In [None]:
aapl_preds, aapl_feats, aapl_testp, aapl_testf = train_test(aapl, ['RSI'], ['Adj-C'], .8)

Now I will create a simple, naive linear regression model

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
simple_linear = LinearRegression()
simple_linear.fit(aapl_feats, aapl_preds)

In [None]:
predicted_price = simple_linear.predict(aapl_testf)

In [None]:
simple_linear.score(aapl_testf, aapl_testp)

Not a very good score.

Let's see what the RSS is.

In [None]:
np.sum((predicted_price - aapl_testp) ** 2)

That's a pretty large number, again not a very good result for this very simplistic regression model.

I want to plot this out to see what these predicted results look like.

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predicted_price, color='blue', linewidth=.5)

Obviously simply using the RSI values without any type of filter to predict prices is not effective.

Now I will use a number of features to see what the result is.

In [None]:
multi_naive_features = ['Adj-O', 'Adj-H', 'Adj-L', 'Adj-Vol', 'RSI']

In [None]:
aapl_preds, aapl_feats, aapl_testp, aapl_testf = train_test(aapl, multi_naive_features, ['Adj-C'], .8)

In [None]:
multi_simple_linear = LinearRegression()
multi_simple_linear.fit(aapl_feats, aapl_preds)

In [None]:
predicted_price = multi_simple_linear.predict(aapl_testf)

Check the regression score:

In [None]:
multi_simple_linear.score(aapl_testf, aapl_testp)

Now calculate the RSS:

In [None]:
np.sum((predicted_price - aapl_testp) ** 2)

This seems a little too good, so now I will plot my predictions.

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predicted_price, color='blue', linewidth=.5)

Where in the first naive regression I had no correlation whatsoever, this one seems to be too good to be true.

Now I what to use the RSI buy/sell thresholds in order to see if I these thresholds are predictive.  I will use linear regression and decision tree regression.

First I want to create another column that will show the post buy sell signal price.  I want to keep a sell column as well, since it theoretically possible to use the overbought signal as signal to short.  Anecdotally the short signal using RSI hasn't not been considered very successful and any number of theories as to why have been floated, however, I have never tested this and want to keep that option open.

I will create another new column, 'Sell Signal Price', which will hold the closing price when the sell signal is generated.  I will again create a function to create this column and then populate it.

In [None]:
def sell_signal_prices(data, overbot, oversold):
    """
    This function will create the 'Sell Signal Column' and populate it with data.
    
    overbot & oversold are ints that represent the relevant thresholds for the RSI.
    """
    
    observations = len(data)
    #Create an empty column and replace the NaNs with 0s.
    data['Sell Signal Price'] = pd.Series(0, np.arange(observations))
    data = data.fillna(0)
    
    #Create a temporary dataframe.
    temp_frame = data[['Adj-C', 'RSI', 'Sell Price', 'Buy Price', 'Sell Signal Price']]
    
    #Loop through the temporary dataframe to populate the 'Sell Signal Price' column with data.
    for price in temp_frame.iterrows():
        if 0 < price[1][1] < oversold:
            place = temp_frame.query(price[0])
            p_start = temp_frame.index.get_loc(place.name)
            temp_list = temp_frame[p_start:]

            for i in temp_list.iterrows():
                if i[1][1] > overbot:
                    price[1][4] = i[1][0]
                    break
    
    #Populate the new column in the original dataframe with the sell price data.
    data['Sell Signal Price'] = temp_frame['Sell Signal Price']
    
    return data

Now I will test the new function with the aapl dataset.  I know location of the first RSI < 30 observation so I will use that observation to see if the function works.

In [None]:
aapl.iloc[9]

In [None]:
aapl = sell_signal_prices(aapl, 70, 30)

In [None]:
aapl.iloc[9]

Success!

Now I want to use this dataset to run some regressions using linear regression and decision tree regeression.

## Running the Regeressions

First I will choose which features I want to use and then I will split up my train/test data.

In [None]:
rsi_features = ['RSI', 'Buy Price']

aapl_preds, aapl_feats, aapl_testp, aapl_testf = train_test(aapl, rsi_features, ['Sell Signal Price'], .8)

### Linear Regression

In [None]:
rsi_30_model = LinearRegression()
rsi_30_model.fit(aapl_feats, aapl_preds)

In [None]:
predicted30_prices = rsi_30_model.predict(aapl_testf)

Score & RSS:

In [None]:
rsi_30_model.score(aapl_testf, aapl_testp)

In [None]:
np.sum((predicted30_prices - aapl_testp) ** 2)

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predicted30_prices, color='blue', linewidth=.5)

### Decision Tree Regression

I am going to use three different max depths (2, 5, 20.)

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
trereg_2 = DecisionTreeRegressor(max_depth=2)
trereg_5 = DecisionTreeRegressor(max_depth=5)
trereg_20 = DecisionTreeRegressor(max_depth=20)

In [None]:
trereg_2.fit(aapl_feats, aapl_preds)
trereg_5.fit(aapl_feats, aapl_preds)
trereg_20.fit(aapl_feats, aapl_preds)

In [None]:
predict_2 = trereg_2.predict(aapl_testf)
predict_5 = trereg_5.predict(aapl_testf)
predict_20 = trereg_20.predict(aapl_testf)

Now I want to see the scores & RSSs.

First I have to clean up the test prices.

In [None]:
testp_matrix = aapl_testp.as_matrix(columns=['Sell Signal Price'])
testp_matrix = testp_matrix.reshape((1787, ))

Max depth 2:

In [None]:
trereg_2.score(aapl_testf, aapl_testp)

In [None]:
np.sum((predict_2 - testp_matrix) ** 2)

Max depth 5:

In [None]:
trereg_5.score(aapl_testf, aapl_testp)

In [None]:
np.sum((predict_5 - testp_matrix) ** 2)

Max depth 20:

In [None]:
trereg_20.score(aapl_testf, aapl_testp)

In [None]:
np.sum((predict_2 - testp_matrix) ** 2)

Better than the naive regression I ran further up, but fairly mediocre.

Now I will plot these to see what they look like.

First, max depth 2:

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predict_2, color='blue', linewidth=.5)

Max depth 5:

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predict_5, color='blue', linewidth=.5)

Max depth 20:

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predict_20, color='blue', linewidth=.5)

Not very impressive results.

## Using holding periods

I want to also see if there is some predictive outcome by simply holding onto a position after a buy signal is genereated.  In order to avoid training each possibilty by hand, I'll now write a function to automate the process.  This idea is inspired by University of Washingtons Linear Regression course on Coursera.

In [None]:
periods = ['Day 1', 'Day 3', 'Day 5', 'Day 10']
hold_features = ['RSI', 'Buy Price']

aapl_hold = aapl[['RSI', 'Buy Price', 'Day 1', 'Day 3', 'Day 5', 'Day 10']]

In [None]:
def predictor(data, periods, features, train):
    """
    A function to automate the modeling for linear regression.
    
    data = the data set that the models will be trained on and predictions made from.
    periods = contains a list of the columns with the relevant price data.
    features = a list of strs that are will be used to train the models.
    train = a float that represents the % of the dataset used for the training set.
    """
    
    observations = len(data)
    train_percent = int(round(observations * train))
    
    #Create and prepare the feature data set.
    features = data[features]
    features = features.fillna(0)
#     features = features.reshape((8935,1))
    train_f = features[:train_percent]
    test_f = features[train_percent:]
    
    for i in periods:
        #Create and prepare the price data set.
        prices = data[i]
        prices = prices.fillna(0)
        prices = prices.reshape((observations,1))
        
        train_p = prices[:train_percent]
        test_p = prices[train_percent:]
        
        #Train model & make predictions.
        model = LinearRegression()
        model.fit(train_f, train_p)
        predictions = model.predict(test_f)
        
        #Get RSS.
        rss = np.sum((predictions - test_p) ** 2)
        
        #Model score.
        score = model.score(test_f, test_p)
        
        print "The score, {}, and RSS, {}, of the model for {}".format(score, rss, i)
    

In [None]:
predictor(aapl_hold, periods, hold_features, .8)

As we can see here, simply using holding periods does not perform better than the first naive regression attempts.

I also want to see if the actual "returns" (defined here simply as the sum of the difference between the buy price & sell signal price) were positive.  This is not an actual backtest, but more of a quick look to see whether or not a simple strategy of buy & selling using the RSI 30/70 generated a positive or negative result.

In [None]:
aapl.shape

In [None]:
aapl.head(2)

In [None]:
comp_list = []

for line in aapl.iterrows():
    if line[1][14] > 0:
        asd = line[1][14] - line[1][5]
        comp_list.append(asd)
        
sum(comp_list)

Again, this isn't meant to replace a legitmate backtest, but simply a quick look to see if buying & selling over the life (or at least our dataset) of a security produced a positve result.  When we start to factor in comissions, slippage, risk management, and position size the actual results would be different from this "back of the envelope" calculation.


# Using Different Thresholds & Timeframes

I will begin by using the 9 day moving average mentioned above.  The timeframe of moving averages tend to be arbitrary and rely upon customs that have been used by technical analysts & chartists over the decades.  No doubt as the increased quantification of trading & finance many of these moving averages (m.a.) have come under scrutiny there is still heavy reliance on the traditional metrics (e.g. the 50 & 200 day m.a.)

I will once again use Apple as the test stock.  I will now use the 9 day RSI.

In [None]:
aapl9 = tick_gettr('AAPL')

In [None]:
aapl9_rsi = rsi(aapl9, 9)
aapl9['RSI'] = aapl9_rsi

In [None]:
aapl9 = aapl9[9:]

In [None]:
aapl9.head(2)

In [None]:
aapl9 = price_columns(aapl9, 70, 30)

In [None]:
aapl9.head(2)

I will also add the holding period columns in order to have consistent datasets that are used for analysis.

In [None]:
base9 = aapl9.copy()
base9 = base9['Adj-C']

day_1 = base9.shift(-1)
day_3 = base9.shift(-3)
day_5 = base9.shift(-5)
day_10 = base9.shift(-10)

In [None]:
aapl9['Day 1'] = day_1
aapl9['Day 3'] = day_3
aapl9['Day 5'] = day_5
aapl9['Day 10'] = day_10

In [None]:
aapl9.head(2)

Finally, I will add the Sell Signal Price column.

In [None]:
aapl9 = sell_signal_prices(aapl9, 70, 30)

In [None]:
aapl9.iloc[13]

The above observation is the first instance of the RSI falling below 30 in the first Apple dataset, that's why it's used here as well to check and see if I've successfully added the sell signal price.

Now I will move onto running the regressions.  I will forgo running the models on the holding periods since they were no better than the first set of "naive" regressions.

First I will create my train/test splits.  I will use the same features as above.

In [None]:
aapl_preds, aapl_feats, aapl_testp, aapl_testf = train_test(aapl9, rsi_features, ['Sell Signal Price'], .8)

## Linear Regression

In [None]:
rsi_309_model = LinearRegression()
rsi_309_model.fit(aapl_feats, aapl_preds)

In [None]:
aapl309_predictions = rsi_309_model.predict(aapl_testf)

In [None]:
rsi_309_model.score(aapl_testf, aapl_testp)

In [None]:
np.sum((aapl309_predictions - aapl_testp) ** 2)

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, aapl309_predictions, color='blue', linewidth=.5)

It looks as though I am fitting the curve very tightly.  This is somewhat expected actually.  One of the most persistent issues with machine learning and trading is the overfitting problem.  Here we see with more trading events, the model is in fact fitting the data almost perfectly.

Now I will run the decision tree regression.

In [None]:
trereg_2309 = DecisionTreeRegressor(max_depth=2)
trereg_5309 = DecisionTreeRegressor(max_depth=5)
trereg_20309 = DecisionTreeRegressor(max_depth=20)

In [None]:
trereg_2309.fit(aapl_feats, aapl_preds)
trereg_5309.fit(aapl_feats, aapl_preds)
trereg_20309.fit(aapl_feats, aapl_preds)

In [None]:
predict_2309 = trereg_2309.predict(aapl_testf)
predict_5309 = trereg_5309.predict(aapl_testf)
predict_20309 = trereg_20309.predict(aapl_testf)

**Scores & RSS**

In [None]:
print "Max depth 2 score: {}".format(trereg_2309.score(aapl_testf, aapl_testp))
print "Max depth 5 score: {}".format(trereg_5309.score(aapl_testf, aapl_testp))
print "Max depth 20 score: {}".format(trereg_20309.score(aapl_testf, aapl_testp))

In [None]:
rsi_9_actual = aapl_testp.as_matrix(columns=['Sell Signal Price'])
rsi_9_actual =  rsi_9_actual.reshape((1788,))

In [None]:
print "Max depth 2 RSS: {}".format(np.sum((predict_2309 - rsi_9_actual) ** 2))
print "Max depth 5 RSS: {}".format(np.sum((predict_5309 - rsi_9_actual) ** 2))
print "Max depth 20 RSS: {}".format(np.sum((predict_20309 - rsi_9_actual) ** 2))

**Charts**

Max depth 2:

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predict_2309, color='blue', linewidth=.5)

Max depth 5:

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predict_5309, color='blue', linewidth=.5)

Max depth 20:

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predict_20309, color='blue', linewidth=.5)

As with the decision tree with the 14 day RSI dataset, we see predictions that don't really correlate with the price data.  It seems pretty self evident that the decision tree is being overly impacted by the early data and is running into problems with predicting accurate prices.

I also want to make the same back of the envelope calculation as I did above to see what effect more trades had on this strategy.

In [None]:
comp_list_9 = []

for line in aapl9.iterrows():
    if line[1][14] > 0:
        asd = line[1][14] - line[1][9]
        comp_list_9.append(asd)
        
sum(comp_list_9)

In [None]:
print "RSI 14 day # of trades: {}, RSI 9 day # of trades: {}".format(len(comp_list), len(comp_list_9))

This is an interesting outcome, since typically strategies with more trades have worse performance.  My expectation, even with this simplistic calculation, was that this sum total would be lower.  However, given that we are looking at only 905 & 1436 trades since 1980, this probably does not qualify as overtrading and my assumptions that the performance is worse for the 9 day RSI is probably unfounded.

Finally I want to consider a lower threshold for the buy signal.  I will use 25 as the lower threshold.  I will also do so on both of the 9 & 14 day datasets that I already have.


**14 Day**

In [None]:
aapl14_25 = tick_gettr('AAPL')
aapl14_25_rsi = rsi(aapl14_25, 14)
aapl14_25['RSI'] = aapl14_25_rsi
aapl14_25 = aapl14_25[14:]
aapl14_25.head(2)

In [None]:
aapl14_25 = price_columns(aapl14_25, 70, 25)
aapl14_25.head(2)

In [None]:
base14_25 = aapl14_25.copy()
base14_25 = base14_25['Adj-C']

day_1 = base14_25.shift(-1)
day_3 = base14_25.shift(-3)
day_5 = base14_25.shift(-5)
day_10 = base14_25.shift(-10)

aapl14_25['Day 1'] = day_1
aapl14_25['Day 3'] = day_3
aapl14_25['Day 5'] = day_5
aapl14_25['Day 10'] = day_10

In [None]:
aapl14_25.head(2)

In [None]:
aapl14_25 = sell_signal_prices(aapl14_25, 70, 25)
aapl14_25.iloc[26]

In [None]:
aapl_preds, aapl_feats, aapl_testp, aapl_testf = train_test(aapl14_25, rsi_features, ['Sell Signal Price'], .8)

In [None]:
rsi_1425_model = LinearRegression()
rsi_1425_model.fit(aapl_feats, aapl_preds)
predictions_1425 = rsi_1425_model.predict(aapl_testf)

In [None]:
print "Score: {} & RSS: {}".format(rsi_1425_model.score(aapl_testf, aapl_testp), np.sum(
        (predictions_1425 - aapl_testp) ** 2))

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predictions_1425, color='blue', linewidth=.5)

In [None]:
comp_list1425 = []

for line in aapl14_25.iterrows():
    if line[1][14] > 0:
        asd = line[1][14] - line[1][9]
        comp_list1425.append(asd)
        
print len(comp_list1425), sum(comp_list1425)

**9 Day**

In [None]:
aapl9_25 = tick_gettr('AAPL')
aapl9_25_rsi = rsi(aapl9_25, 9)
aapl9_25['RSI'] = aapl9_25_rsi
aapl9_25 = aapl9_25[9:]
aapl9_25.head(2)

In [None]:
aapl9_25 = price_columns(aapl9_25, 70, 25)
aapl9_25.head(2)

In [None]:
base9_25 = aapl9_25.copy()
base9_25 = base9_25['Adj-C']

day_1 = base9_25.shift(-1)
day_3 = base9_25.shift(-3)
day_5 = base9_25.shift(-5)
day_10 = base9_25.shift(-10)

aapl9_25['Day 1'] = day_1
aapl9_25['Day 3'] = day_3
aapl9_25['Day 5'] = day_5
aapl9_25['Day 10'] = day_10

aapl9_25.head(2)

In [None]:
aapl9_25 = sell_signal_prices(aapl9_25, 70, 25)
aapl9_25.iloc[26]

In [None]:
aapl_preds, aapl_feats, aapl_testp, aapl_testf = train_test(aapl9_25, rsi_features, ['Sell Signal Price'], .8)

In [None]:
rsi_925_model = LinearRegression()
rsi_925_model.fit(aapl_feats, aapl_preds)
predictions_925 = rsi_925_model.predict(aapl_testf)

In [None]:
print "Score: {} & RSS: {}".format(rsi_925_model.score(aapl_testf, aapl_testp), np.sum(
        (predictions_925 - aapl_testp) ** 2))

In [None]:
plt.scatter(aapl_testf.index, aapl_testp, color='black')
plt.plot(aapl_testf.index, predictions_925, color='blue', linewidth=.5)

In [None]:
comp_list925 = []

for line in aapl9_25.iterrows():
    if line[1][14] > 0:
        asd = line[1][14] - line[1][9]
        comp_list925.append(asd)
        
print len(comp_list925), sum(comp_list925)

MSFT, ZUMZ, IBM, X