In [4]:
import pandas as pd
import numpy as np
import pandas_datareader
from pandas_datareader import data as pdr
from datetime import datetime, timedelta
import fix_yahoo_finance as yf


def stockData(sList, stDate, endDate):
    """Take list of stocks and pull data for that stock and create technical indicator variables
    sList requires list of stocks, dates should be entered in format 'YYYY-MM-DD' """
    #trailing measures require a certain number of days to calculate. This makes sure we get the necessary datapoints
    started = datetime.strptime(stDate, "%Y-%m-%d")
    backdate = started - timedelta(days=30)
   
    data = []
    for element in sList:
        temp = pdr.get_data_yahoo(element, start = backdate, end = endDate)
    #add an indentifier
        temp['Symbol'] = element
        pd.to_datetime(temp.index)

        #Subtract day's close by prior day's close
        temp['price_change'] = (temp['Adj Close'] - temp['Adj Close'].shift())

        #next day price movement (target variable)
        temp['price_chg_nextday'] = (temp['price_change'].shift(-1))
        temp['perc_change'] = temp['price_chg_nextday'] / temp['Adj Close']
        #Calculate if a period was a rise or not over previous periods
        
        temp.loc[temp['perc_change'] >= .01, 'size_of_change'] = 1
        temp.loc[temp['perc_change'] <= -.01, 'size_of_change'] = -1
        temp.loc[(temp['perc_change'] < .01) & (temp['perc_change'] > -.01), 'size_of_change'] = 0
        
        temp.loc[temp['perc_change'] > 0, 'stock_up_down'] = 1
        temp.loc[temp['perc_change'] <= 0, 'stock_up_down'] = 0
        

        #numpy sign function gives 1 for rise, 0 for no change and -1 for decline. makes it easy to multiply volumes
        temp['sign'] = np.sign(temp['price_change'].dropna())

        #next day sign (target variable)
        temp['sign_nextday'] = (temp['sign'].shift(-1))

     
        #Calculate if a period was a rise or not over previous periods
        temp['Rise'] = [1 if x == 1 else 0 for x in temp['sign']]

        #On Basis Volume (OBV calculation)
        temp['OBV'] = (temp['Volume'] * temp['sign']).cumsum()

        #Psychological Line(PSY) caluclation is the number of increasing days over a specified period, 12 in this case
        temp['PSY12'] = ((temp['Rise'].rolling(window=12, center=False).sum())/12)*100

        #Rolling mean of the last 6 adjusted closing prices for the BIAS calculation
        SMA6 = temp['Adj Close'].rolling(window=6, center=False).mean()

        temp['BIAS6'] = ((temp['Adj Close'] - SMA6)/ SMA6)*100

        #for loop to calculate the average return over a given number of periods
        for i in range(1,6):
            temp['ASY' + str(i)] =(temp['price_change'].rolling(window=i, center=False).sum())/i

        #Ticknor indicatiors: https://parsproje.com/tarjome/modiriyat/492.pdf
        #Rolling mean of the last 10 adjusted closing price
        temp['SMA10'] = (temp['Adj Close'].rolling(window=10, center=False).mean())
        #Exponential Moving Average: weighting the more recent values more
        temp['EMA10'] = temp['Adj Close'].ewm(span = 10, adjust=False, min_periods=10).mean()

        #Rolling mean of the last 5 adjusted closing prices 
        temp['SMA5'] = temp['Adj Close'].rolling(window=5, center=False).mean()
        #Exponential Moving Average: weighting the more recent values more
        rest5 = temp.SMA5[5:]
        temp['EMA5'] = temp['Adj Close'].ewm(span=5, adjust=False, min_periods=5).mean()

        #Relative Strength Index
        up, down = temp['price_change'].copy(), temp['price_change'].copy()
        up[up < 0] = 0
        down[down > 0] = 0
        rUp = up.ewm(com=13,  adjust=False, min_periods = 13).mean()
        rDown = down.ewm(com=13, adjust=False, min_periods = 13).mean().abs()

        RSI = 100 - 100 / (1 + rUp / rDown)
        temp['RSI'] = RSI

        #Williams%R
        high = temp['High'].rolling(window=14, center=False).max()
        low = temp['Low'].rolling(window=14, center=False).min()
        temp['WilliamsR%'] = (high - temp['Close'])/ (high - low)*-100

        #Stochastic K%
        temp['stochasticK%'] = (temp['Close'] - low) / (high - low) * 100

        #Stochastic D%
        temp['stochasticD%'] = (temp['stochasticK%'].rolling(window=3, center=False).mean())

        #add data to list 
        data.append(temp)
    #convert to dataframe    
    df = pd.concat(data)
    #drop rows for which we will not have all data points calculated
    df = df.dropna()
    #dates above what user entered
    df = df.loc[stDate : endDate]
    #order and limit columns to those needed
    df = df.loc[:, ['OBV', 'PSY12', 'BIAS6', 'ASY1','ASY2','ASY3','ASY4','ASY5', 'SMA10', 'EMA10', 
                'SMA5', 'EMA5', 'RSI', 'WilliamsR%', 'stochasticK%', 'stochasticD%', 'stock_up_down']]
    
    return(df)

#https://dataanalysiswithpandas.blogspot.com/2016/08/technical-indicator-with-pandas-and.html

In [5]:
#List of automaker ticker symbols
companies = pd.read_csv("https://raw.githubusercontent.com/Capstone-CUNY/Capstone-Main/master/Car_Ticks.csv")
companies.head()

Unnamed: 0,Ticker,Name,Country,parent,Comments
0,TSLA,Tesla,USA,,
1,F,Ford,USA,Ford Motor Co,owns Ford and Lincoln
2,FCAU,Fiat Chrysler,USA,Fiat Chrysler Automobiles,"owns Alfa Romeo, Chrysler, Dodge, Fiat, Jeep, ..."
3,GM,GM,USA,General Motors,"owns Buick, Cadillac, Chevrolet, and GMC"
4,DAI.DE,Mercedes,GER,Daimler AG,owns Mercedes-Benz and Smart


In [8]:
#Pass the ticker list to the function
df = stockData(companies['Ticker'].tolist(), '2013-10-19', '2018-10-19')
df.shape

(21939, 17)

In [9]:
df.head()

Unnamed: 0_level_0,OBV,PSY12,BIAS6,ASY1,ASY2,ASY3,ASY4,ASY5,SMA10,EMA10,SMA5,EMA5,RSI,WilliamsR%,stochasticK%,stochasticD%,stock_up_down
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2013-10-21,1119400.0,58.333333,-4.642637,-10.799988,-5.099998,-3.65333,-2.834999,-1.423999,178.115999,179.619015,181.260001,178.975876,55.554792,-63.402554,36.597446,56.017997,0.0
2013-10-22,-10267300.0,50.0,-4.509021,-1.060013,-5.93,-3.753337,-3.005001,-2.480002,177.796999,178.150102,178.779999,176.497248,54.556087,-63.209983,36.790017,46.766181,0.0
2013-10-23,-23587700.0,41.666667,-6.746031,-7.039993,-4.050003,-6.299998,-4.575001,-3.812,177.368999,175.668265,174.967999,172.498166,48.34062,-84.811431,15.188569,29.525344,1.0
2013-10-24,-12762000.0,50.0,-0.867377,8.649994,0.805,0.183329,-2.5625,-1.930002,177.390999,175.210398,173.037997,172.715442,55.108097,-54.608938,45.391062,32.45655,0.0
2013-10-25,-20357500.0,50.0,-1.632118,-3.48999,2.580002,-0.626663,-0.735001,-2.747998,176.487,174.201235,170.289999,171.696962,52.140236,-66.794658,33.205342,31.261658,0.0


In [10]:
df['stock_up_down'].value_counts()

0.0    12200
1.0     9739
Name: stock_up_down, dtype: int64

In [11]:
#Check number of up days compared to down or flat days. Adjust for imbalance
bal = df['stock_up_down'].value_counts()
mult = bal.loc[0,]/bal.loc[1]
mult

1.2526953485984187

In [12]:
weight = {0: 1., 
          1: mult}

In [13]:
#Preproccesing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#Create X and Y variables
dataset = df.values
X = dataset[:,0:len(df.columns)-1].astype(float)
Y = dataset[:,len(df.columns)-1: len(df.columns)].astype(float)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
#https://www.datacamp.com/community/tutorials/deep-learning-python?utm_source=adwords_ppc&utm_campaignid=1565261270&utm_adgroupid=67750485268&utm_device=c&utm_keyword=&utm_matchtype=b&utm_network=g&utm_adpostion=1t2&utm_creative=295208661505&utm_targetid=aud-299261629574:dsa-473406573755&utm_loc_interest_ms=&utm_loc_physical_ms=9060446&gclid=CjwKCAjwx7DeBRBJEiwA9MeX_CEsfxirk8KaFqoEW_9X7UhP7ufasCyuetSADYr7hR80e4U8o2g98xoCIosQAvD_BwE

# Define the scaler 
scaler = StandardScaler().fit(X_train)

# Scale the train set
X_train = scaler.transform(X_train)

# Scale the test set
X_test = scaler.transform(X_test)

In [14]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers



def neural(X, Y):
    #set seed
    seed = 123
    np.random.seed(seed)

    # 
    number_of_features = len(df.columns)-1


    network = models.Sequential()
    network.add(layers.Dense(units=16, activation='relu'))
    network.add(layers.Dropout(0.2))
    network.add(layers.Dense(units=8, activation='relu'))
    network.add(layers.Dropout(0.2))
    network.add(layers.Dense(units=1, activation='sigmoid'))

    #Compile neural network
    network.compile(loss='binary_crossentropy', # Cross-entropy
                    optimizer='rmsprop', # Root Mean Square Propagation
                    metrics=['accuracy']) # Accuracy performance metric


    #Training model
    history = network.fit(X, 
                          Y, 
                          epochs=50, 
                          verbose=0, 
                          batch_size=25,
                          class_weight=weight) 

    scores = network.evaluate(X, Y)
    print("\n%s: %.2f%%" % (network.metrics_names[1], scores[1]*100))
    return network





#https://chrisalbon.com/deep_learning/keras/adding_dropout/

In [15]:
nnet = neural(X_train, Y_train)


acc: 54.64%


In [16]:
score = nnet.evaluate(X_test, Y_test,verbose=1)

print(score)

[0.6620489341119377, 0.5488950276243094]
