In [1]:
import numpy as np
import datetime as dt
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import math
import warnings
warnings.filterwarnings("ignore")

In [2]:
ticker1 = 'C=F'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df1 = web.DataReader(ticker1,'yahoo',start,end)
df1[[f'{ticker1} Adj Close']] = df1[['Adj Close']]
df1 = df1[[f'{ticker1} Adj Close']]
df1

Unnamed: 0_level_0,C=F Adj Close
Date,Unnamed: 1_level_1
2015-01-02,395.75
2015-01-05,406.00
2015-01-06,405.00
2015-01-07,396.25
2015-01-08,394.25
...,...
2020-05-18,320.75
2020-05-19,321.50
2020-05-20,319.75
2020-05-21,317.75


In [3]:
#Small-Cap Gold Miners (ADM, BG, GPRE, INGR, MGPI)
ticker = 'MGPI'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df = web.DataReader(ticker,'yahoo',start,end)
df = df[['Open','High','Low','Close','Adj Close','Volume']]

In [4]:
def RSI(ticker):
    '''Coolest function... Can see the RSI values fluctuate (usually) from 20/30 to 70/80/90.
    Stocks that fluctuate and have good credit (or part of big indices) would be good picks 
    for trading strategy... below 50 is a buy indicator'''
    #end = dt.datetime.now()
    end = '2020-05-10'
    start = '2015-01-01'
    df = web.DataReader(ticker,'yahoo',start,end)
    df = df[['Adj Close']]
    # create a column called daily return
    df['Daily Return'] = df['Adj Close'].pct_change()*100
    # making a dataframe only for the percent return of each day
    df['Up/Down'] = np.sign(df['Daily Return'])
    #drop the nan values
    df.dropna(inplace=True)
    df['Up']=(df['Up/Down']>0).astype(int)
    df['Down']=(df['Up/Down']<0).astype(int)
    up = df['Up']
    down = df['Down']
    gain = pd.Series(up).rolling(window=14).sum()
    loss = pd.Series(down).rolling(window=14).sum()
    df['Gain'] = gain/14
    df['Loss'] = loss/14
    df['RS'] = df['Gain']/df['Loss']
    df['RSI'] = 100 - (100/(1+df['RS']))
    df.dropna(inplace=True)
    df = df[['RSI']]
    return(df)

In [5]:
df3 = RSI(ticker)

In [6]:
def EPS(symbol):
    from yahoo_earnings_calendar import YahooEarningsCalendar
    yec = YahooEarningsCalendar()
    ticker = (yec.get_earnings_of(symbol))
    data = pd.DataFrame(ticker)
    df = data.dropna()
    df['Date'] = pd.to_datetime(df['startdatetime']).dt.date
    df = df[['Date','ticker','epsestimate','epsactual','epssurprisepct']]
    df['Date'] = df['Date'].apply(pd.to_datetime)
    df = df.drop_duplicates(subset='Date')
    df = df.head(43)
    df = df.set_index('Date')
    df[['epssurprisepct']]
    data1 = web.DataReader(symbol,'yahoo',start,end)
    df1 = data1[['Adj Close']]
    df1 = df1.sort_index(ascending=False)
    df_combined = pd.concat([df,df1],axis=1)
    df_combined = df_combined.fillna(method='ffill')
    df_combined.dropna(inplace=True)
    df_combined=df_combined[['epssurprisepct']]
    return(df_combined)

In [7]:
df2 = EPS(ticker)
df2

Unnamed: 0_level_0,epssurprisepct
Date,Unnamed: 1_level_1
2015-01-02,71.78
2015-01-05,71.78
2015-01-06,71.78
2015-01-07,71.78
2015-01-08,71.78
...,...
2020-05-18,68.04
2020-05-19,68.04
2020-05-20,68.04
2020-05-21,68.04


In [8]:
df = pd.concat([df,df1,df2,df3],axis=1)
df = df[['Open','High','Low','Close','Adj Close','Volume',f'{ticker1} Adj Close','epssurprisepct','RSI']]
df = df.dropna()
df = df.reset_index()
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,C=F Adj Close,epssurprisepct,RSI
0,2015-01-23,16.100000,16.299999,15.610000,15.760000,15.059680,30600.0,386.75,71.78,50.000000
1,2015-01-26,15.500000,15.970000,15.500000,15.890000,15.183904,38900.0,384.00,71.78,50.000000
2,2015-01-27,15.850000,16.170000,15.800000,16.059999,15.346348,25000.0,381.25,71.78,57.142857
3,2015-01-28,15.990000,16.160000,15.700000,15.890000,15.183904,33400.0,373.25,71.78,50.000000
4,2015-01-29,15.860000,16.440001,15.700000,16.080000,15.365461,34200.0,371.50,71.78,57.142857
...,...,...,...,...,...,...,...,...,...,...
1328,2020-05-18,35.830002,36.820000,35.470001,36.000000,35.879833,115800.0,320.75,68.04,50.000000
1329,2020-05-19,35.840000,36.450001,35.340000,35.389999,35.271870,92000.0,321.50,68.04,42.857143
1330,2020-05-20,36.160000,36.660000,35.480000,35.950001,35.830002,92800.0,319.75,68.04,50.000000
1331,2020-05-21,35.720001,35.990002,35.259998,35.340000,35.340000,90200.0,317.75,68.04,42.857143


In [9]:
X = df.drop(['Date','Close','Open'], axis=1, inplace=True)
y = df[['Adj Close']]

In [10]:
df = np.asmatrix(df)
df

matrix([[ 16.29999924,  15.60999966,  15.05967999, ..., 386.75      ,
          71.78      ,  50.        ],
        [ 15.97000027,  15.5       ,  15.18390369, ..., 384.        ,
          71.78      ,  50.        ],
        [ 16.17000008,  15.80000019,  15.34634781, ..., 381.25      ,
          71.78      ,  57.14285714],
        ...,
        [ 36.65999985,  35.47999954,  35.83000183, ..., 319.75      ,
          68.04      ,  50.        ],
        [ 35.99000168,  35.25999832,  35.34000015, ..., 317.75      ,
          68.04      ,  42.85714286],
        [ 35.88999939,  34.70000076,  35.36999893, ..., 318.25      ,
          68.04      ,  42.85714286]])

In [11]:
from sklearn.model_selection import train_test_split
# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.25,  random_state=0)

In [12]:
from sklearn.linear_model import LinearRegression
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [13]:
intercept = regression_model.intercept_[0]

In [14]:
regression_model.score(X_test, y_test)

1.0

In [15]:
from sklearn.metrics import mean_squared_error
y_predict = regression_model.predict(X_test)
regression_model_mse = mean_squared_error(y_predict, y_test)
regression_model_mse

2.85456936966606e-24

In [16]:
math.sqrt(regression_model_mse)

1.689547090100202e-12

In [17]:
# input the latest Open, High, Low, Close, Volume
# predicts the next day price
data2 = web.DataReader(ticker1,'yahoo',start,dt.datetime.now())
data1 = web.DataReader(ticker,'yahoo',start,dt.datetime.now())
eps = df2['epssurprisepct'].iloc[-1]
High = data1['High'].iloc[-1]
Low = data1['Low'].iloc[-1]
Close = data1['Adj Close'].iloc[-1]
Volume = data1['Volume'].iloc[-1]
Gold = data2['Adj Close'].iloc[-1]
RSI = df3['RSI'].iloc[-1]
prediction = regression_model.predict([[eps, High, Low, Close, Volume, Gold, RSI]])
print(f'1. {ticker} will close at ${prediction.mean().round(3)} next time window.')
print(f'2. The projected daily change is {(((prediction - Close) / Close)*100).mean()}%.')
if prediction < Close:
    print(f'3. {ticker} will be down from the previous closing of {Close}.')
elif prediction == Close:
    print(f'3. {ticker} will be the same as the previous closing of {Close}.')
else:
    print(f'3. {ticker} will be the up from the previous closing of {Close}.')

1. MGPI will close at $34.7 next time window.
2. The projected daily change is -1.894255553240318%.
3. MGPI will be down from the previous closing of 35.369998931884766.
