In [20]:
import pandas as pd
import numpy as np
from datetime import timedelta, datetime, date
from sklearn.linear_model import LogisticRegression as LR
import pandas_datareader as pdr
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [21]:
yesterday = (date.today() - timedelta(days = 1)).strftime("%Y-%m-%d")
trail_700 = (date.today() - timedelta(days = 700)).strftime("%Y-%m-%d")

stock = 'A'

df = pdr.DataReader(stock, 'yahoo', trail_700, yesterday)

# List where we will keep track of long and short average points
indicators = pd.DataFrame(index=df.index)

# Exponential moving averages using the closing data
short = 5
long = 20
indicators['short_avg'] = df['Close'].ewm(span=short, adjust=False).mean()
indicators['long_avg'] = df['Close'].ewm(span=long, adjust=False).mean()

df = df.merge(indicators, on = df.index)

df["diff"] = df['short_avg'] - df['long_avg']

df['indicator'] = np.where(
    abs(df['diff']) < 0.2,
    1.0,
    0.0
)

df['chart_indicator'] = np.where(
    abs(df['diff']) < 0.2,
    df['diff'].index,
    0
)

fig = go.Figure()

# Create and style traces
fig.add_trace(go.Scatter(x=df['key_0'], y=df['Close'], name='Close',
                         line=dict(color='royalblue', width=2.5)))

fig.add_trace(go.Scatter(x=df['key_0'], y=df['short_avg'], name = f'{short} Moving Avg.',
                         line=dict(color='firebrick', width=1.5, dash = 'dash')))

fig.add_trace(go.Scatter(x=df['key_0'], y=df['long_avg'], name=f'{long} Moving Avg.',
                         line=dict(color='green', width=1.5,
                              dash='dash') # dash options include 'dash', 'dot', and 'dashdot'
))

fig.update_layout(title=f'{stock} Price Trend',
                   xaxis_title='Day',
                   yaxis_title='Price')


In [22]:
df['daily_change'] = df['Close'] - df['Open']

df['gains'] = np.where(
    df['daily_change'] > 0,
    df['daily_change'],
    0
)

df['loss'] = np.where(
    df['daily_change'] < 0,
    df['daily_change'],
    0
)

df['up_down'] = np.where(
    df['daily_change'] < 0,
    0.0,
    1.0
)

df = df.drop('chart_indicator', axis = 1)
dates = df['key_0']
df = df.drop('key_0', axis = 1)

scaler = MinMaxScaler()

test_df = df.copy()

test_df = pd.DataFrame(scaler.fit_transform(test_df))
test_df.columns = list(df.columns)

In [23]:
df_clean = test_df[['Adj Close', 'long_avg', 'daily_change', 'loss', 'up_down']]

adj_X = df_clean.drop('up_down', axis = 1)
adj_y = df_clean["up_down"]

x_train, x_test, y_train, y_test = train_test_split(adj_X, adj_y, test_size=0.3, random_state=42)

In [24]:
clf = LR(random_state=0).fit(x_train,y_train)

clf.score(x_test,y_test)

0.8827586206896552

In [25]:
predictions = clf.predict(x_test)
pred_probs = clf.predict_proba(x_test)
predictions

array([1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1.,
       1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0.,
       1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1.,
       1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1.,
       1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0.,
       0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0.,
       0., 1., 0., 1., 0., 1., 1., 0., 0.])

In [26]:
probabilities = []

for x,i in enumerate(predictions):
    
    if i == 0:
        
#         print(f"For an outcome of {i} there is a {pred_probs[x][0]}")
        probabilities.append(pred_probs[x][0])
        
    else:
        
#         print(f"For an outcome of {i} there is a {pred_probs[x][1]}")
        probabilities.append(pred_probs[x][1])


In [27]:
x_test['preds'] = predictions
x_test['reals'] = y_test
x_test['prob'] = probabilities

x_test



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Adj Close,long_avg,daily_change,loss,preds,reals,prob
73,0.392501,0.324411,0.467312,1.000000,1.0,1.0,0.653139
415,0.653274,0.638775,0.438257,1.000000,1.0,1.0,0.603579
392,0.686266,0.551782,0.595641,1.000000,1.0,1.0,0.797300
278,0.522186,0.467640,0.503632,1.000000,1.0,1.0,0.695694
400,0.599812,0.598649,0.261502,0.600001,0.0,0.0,0.929323
...,...,...,...,...,...,...,...
16,0.237440,0.096848,0.219129,0.502779,0.0,0.0,0.961667
444,0.848087,0.884737,0.453995,1.000000,1.0,1.0,0.616926
419,0.680201,0.656609,0.661017,1.000000,1.0,1.0,0.847880
3,0.019825,0.017979,0.294189,0.675000,0.0,0.0,0.876092


In [28]:
x_test[x_test['prob'] > 0.8]

Unnamed: 0,Adj Close,long_avg,daily_change,loss,preds,reals,prob
400,0.599812,0.598649,0.261502,0.600001,0.0,0.0,0.929323
376,0.422464,0.326175,0.226392,0.519443,0.0,0.0,0.958135
77,0.376556,0.351006,0.306295,0.702778,0.0,0.0,0.855568
30,0.067544,0.052748,0.751816,1.0,1.0,1.0,0.90958
247,0.313798,0.282214,0.282082,0.647223,0.0,0.0,0.898901
408,0.563828,0.615294,0.274818,0.630555,0.0,0.0,0.913746
322,0.373445,0.527992,0.102905,0.236111,0.0,0.0,0.995101
307,0.471333,0.586064,0.297821,0.683333,0.0,0.0,0.878212
378,0.464289,0.346074,0.720338,1.0,1.0,1.0,0.892291
393,0.668508,0.566666,0.223971,0.513889,0.0,0.0,0.960686
