In [49]:
from yahoo_fin.stock_info import get_data
import numpy as np


def get_ml_formatted_data(ticker, start_date='01/01/2015', end_date='01/01/2020'):
    # Step 1: get data
    x = get_data(ticker=ticker,start_date=start_date, end_date=end_date)
    # Step 2: process data
    del x['ticker']
    del x['adjclose']
    del x['volume']
    y = x.pct_change()
    y = y.iloc[1:]

    def cust_filter(row):
        if (row.name + 1) % 6 == 0:
            return int(row['open'] > 0)
        else:
            return float('NaN')

    y.reset_index(drop=True, inplace=True)
    y['opened_up'] = y.apply(cust_filter, axis=1)
    labels = y['opened_up'].dropna().values.astype(int)
    del y['opened_up']

    # flatten() makes it so we can put it through our model
    chunks = [y[a:a+5].values.flatten() for a in range(0, len(y), 6)]

    if len(chunks) > len(labels):
        chunks.pop(-1)
    data = np.asarray(chunks)
    return labels, data
def get_ml_data_multiple_tickers(tickers_list, start_date='01/01/2015', end_date='01/01/2020'):
    data_list = []
    labels_list = []
    for ticker in tickers_list:
        labels, data = get_ml_formatted_data(ticker)
        labels_list.append(labels)
        data_list.extend(data)
    return np.append([], labels_list), np.asarray(data_list) 
    
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split

labels, data = get_ml_formatted_data('AAPL')
# lbfgs converges fast for small datasets
# hidden layers are basically the neurons in between the input and output, I made them 2x # features
# which is 4 (1-Open 2-High 3-Low 4-Close)
# activation using tanh prevents neuron deaths. neuron deaths cause all outputs to become 0 or all become 1
model = MLPClassifier(solver='lbfgs',hidden_layer_sizes=(8,8,8),max_iter=1000, activation='tanh')
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=.5)
model.fit(X_train, y_train)

predicted = model.predict(X_test)
print(metrics.classification_report(y_test, predicted))




             precision    recall  f1-score   support

          0       0.76      0.62      0.68        50
          1       0.70      0.82      0.76        55

avg / total       0.73      0.72      0.72       105



In [46]:
from get_all_tickers.get_tickers import get_tickers_filtered

get_tickers_filtered(mktcap_min=)

['XRF',
 'JFKKW',
 'AGNCN',
 'SONGW',
 'AMZN',
 'ASRVP',
 'AAPL',
 'DCTH',
 'ESSCW',
 'ESGRO',
 'ESGRP',
 'MSFT',
 'NBACW',
 'ECOW',
 'SHIPZ']

In [25]:
from yahoo_fin.stock_info import get_data
import numpy as np



# Step 1: get data
x = get_data(ticker='AAPL',start_date='12/15/2019', end_date='01/01/2020')
# Step 2: process data
del x['ticker']
del x['adjclose']
del x['volume']
y = x.pct_change()
y = y.iloc[1:]

def cust_filter(row):
    if (row.name + 1) % 6 == 0:
        return int(row['open'] > 0)
    else:
        return float('NaN')

y.reset_index(drop=True, inplace=True)
y['opened_up'] = y.apply(cust_filter, axis=1)
print(y)
labels = y['opened_up'].dropna().values
del y['opened_up']

# flatten() makes it so we can put it through our model
chunks = [y[a:a+5].values.flatten() for a in range(0, len(y), 6)]
if len(chunks > len(labels)):
    chunks.pop(-1)

data = np.asarray(chunks)
print(len(chunks))
print(len(labels))


       open      high       low     close  opened_up
0  0.009278  0.003490  0.006571  0.001965        NaN
1  0.000823  0.000461  0.001148 -0.002389        NaN
2 -0.001072 -0.002554 -0.000609  0.001001        NaN
3  0.009767  0.005228 -0.001398 -0.002071        NaN
4 -0.006023  0.005661  0.006498  0.016318        NaN
5  0.014829  0.002252  0.009095  0.000951        1.0
6  0.000457  0.017867  0.006292  0.019840        NaN
7  0.022119  0.013760  0.012013 -0.000379        NaN
8 -0.005702 -0.004354 -0.010065  0.005935        NaN
9  0.001624  0.003382  0.015076  0.007307        NaN
2
1


In [29]:
y = [t for t in range(0, 10, 6)]
y.pop(-1)

6