In [1]:
import numpy as np
import datetime as dt
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import math
import warnings
warnings.filterwarnings("ignore")

In [2]:
ticker1 = 'SI=F'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df1 = web.DataReader(ticker1,'yahoo',start,end)
df1[[f'{ticker1} Adj Close']] = df1[['Adj Close']]
df1 = df1[[f'{ticker1} Adj Close']]
df1

Unnamed: 0_level_0,SI=F Adj Close
Date,Unnamed: 1_level_1
2015-01-02,15.734000
2015-01-05,16.179001
2015-01-06,16.603001
2015-01-07,16.510000
2015-01-08,16.351000
...,...
2020-05-19,17.895000
2020-05-20,17.990000
2020-05-21,17.424999
2020-05-22,17.690001


In [3]:
#Small-Cap Gold Miners (AG, EXK, PAAS, FSM, HL, CDE
ticker = 'CDE'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df = web.DataReader(ticker,'yahoo',start,end)
df = df[['Open','High','Low','Close','Adj Close','Volume']]

In [4]:
def RSI(ticker):
    '''Coolest function... Can see the RSI values fluctuate (usually) from 20/30 to 70/80/90.
    Stocks that fluctuate and have good credit (or part of big indices) would be good picks 
    for trading strategy... below 50 is a buy indicator'''
    #end = dt.datetime.now()
    end = '2020-05-10'
    start = '2015-01-01'
    df = web.DataReader(ticker,'yahoo',start,end)
    df = df[['Adj Close']]
    # create a column called daily return
    df['Daily Return'] = df['Adj Close'].pct_change()*100
    # making a dataframe only for the percent return of each day
    df['Up/Down'] = np.sign(df['Daily Return'])
    #drop the nan values
    df.dropna(inplace=True)
    df['Up']=(df['Up/Down']>0).astype(int)
    df['Down']=(df['Up/Down']<0).astype(int)
    up = df['Up']
    down = df['Down']
    gain = pd.Series(up).rolling(window=14).sum()
    loss = pd.Series(down).rolling(window=14).sum()
    df['Gain'] = gain/14
    df['Loss'] = loss/14
    df['RS'] = df['Gain']/df['Loss']
    df['RSI'] = 100 - (100/(1+df['RS']))
    df.dropna(inplace=True)
    df = df[['RSI']]
    return(df)

In [5]:
df3 = RSI(ticker)

In [6]:
def EPS(symbol):
    from yahoo_earnings_calendar import YahooEarningsCalendar
    yec = YahooEarningsCalendar()
    ticker = (yec.get_earnings_of(symbol))
    data = pd.DataFrame(ticker)
    df = data.dropna()
    df['Date'] = pd.to_datetime(df['startdatetime']).dt.date
    df = df[['Date','ticker','epsestimate','epsactual','epssurprisepct']]
    df['Date'] = df['Date'].apply(pd.to_datetime)
    df = df.drop_duplicates(subset='Date')
    df = df.head(43)
    df = df.set_index('Date')
    df[['epssurprisepct']]
    data1 = web.DataReader(symbol,'yahoo',start,end)
    df1 = data1[['Adj Close']]
    df1 = df1.sort_index(ascending=False)
    df_combined = pd.concat([df,df1],axis=1)
    df_combined = df_combined.fillna(method='ffill')
    df_combined.dropna(inplace=True)
    df_combined=df_combined[['epssurprisepct']]
    return(df_combined)

In [7]:
df2 = EPS(ticker)
df2

Unnamed: 0_level_0,epssurprisepct
Date,Unnamed: 1_level_1
2015-01-02,24.09
2015-01-05,24.09
2015-01-06,24.09
2015-01-07,24.09
2015-01-08,24.09
...,...
2020-05-18,100.00
2020-05-19,100.00
2020-05-20,100.00
2020-05-21,100.00


In [8]:
df = pd.concat([df,df1,df2,df3],axis=1)
df = df[['Open','High','Low','Close','Adj Close','Volume',f'{ticker1} Adj Close','epssurprisepct','RSI']]
df = df.dropna()
df = df.reset_index()
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,SI=F Adj Close,epssurprisepct,RSI
0,2015-01-23,6.24,6.25,5.93,5.99,5.99,2234000.0,18.284000,24.09,57.142857
1,2015-01-26,5.85,6.13,5.69,6.11,6.11,2149800.0,17.966999,24.09,57.142857
2,2015-01-27,6.14,6.42,6.12,6.32,6.32,3697700.0,18.068001,24.09,57.142857
3,2015-01-28,6.23,6.36,5.89,5.99,5.99,2668300.0,18.068001,24.09,57.142857
4,2015-01-29,5.83,6.00,5.66,5.94,5.94,2988900.0,16.757999,24.09,57.142857
...,...,...,...,...,...,...,...,...,...,...
1327,2020-05-18,5.30,5.43,4.88,5.29,5.29,10476300.0,17.475000,100.00,57.142857
1328,2020-05-19,5.42,6.08,5.38,5.86,5.86,13049700.0,17.895000,100.00,57.142857
1329,2020-05-20,5.97,6.05,5.69,5.97,5.97,7667400.0,17.990000,100.00,64.285714
1330,2020-05-21,5.75,5.80,5.44,5.70,5.70,6193400.0,17.424999,100.00,64.285714


In [9]:
X = df.drop(['Date','Close','Open'], axis=1, inplace=True)
y = df[['Adj Close']]

In [10]:
df = np.asmatrix(df)
df

matrix([[  6.25      ,   5.92999983,   5.98999977, ...,  18.2840004 ,
          24.09      ,  57.14285714],
        [  6.13000011,   5.69000006,   6.11000013, ...,  17.96699905,
          24.09      ,  57.14285714],
        [  6.42000008,   6.11999989,   6.32000017, ...,  18.06800079,
          24.09      ,  57.14285714],
        ...,
        [  6.05000019,   5.69000006,   5.96999979, ...,  17.98999977,
         100.        ,  64.28571429],
        [  5.80000019,   5.44000006,   5.69999981, ...,  17.42499924,
         100.        ,  64.28571429],
        [  5.86999989,   5.44000006,   5.51000023, ...,  17.69000053,
         100.        ,  57.14285714]])

In [11]:
from sklearn.model_selection import train_test_split
# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.25,  random_state=0)

In [12]:
from sklearn.linear_model import LinearRegression
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [13]:
intercept = regression_model.intercept_[0]

In [14]:
regression_model.score(X_test, y_test)

1.0

In [15]:
from sklearn.metrics import mean_squared_error
y_predict = regression_model.predict(X_test)
regression_model_mse = mean_squared_error(y_predict, y_test)
regression_model_mse

4.1213571605912256e-27

In [16]:
math.sqrt(regression_model_mse)

6.419779716307427e-14

In [17]:
# input the latest Open, High, Low, Close, Volume
# predicts the next day price
data2 = web.DataReader(ticker1,'yahoo',start,dt.datetime.now())
data1 = web.DataReader(ticker,'yahoo',start,dt.datetime.now())
eps = df2['epssurprisepct'].iloc[-1]
High = data1['High'].iloc[-1]
Low = data1['Low'].iloc[-1]
Close = data1['Adj Close'].iloc[-1]
Volume = data1['Volume'].iloc[-1]
Gold = data2['Adj Close'].iloc[-1]
RSI = df3['RSI'].iloc[-1]
prediction = regression_model.predict([[eps, High, Low, Close, Volume, Gold, RSI]])
print(f'1. {ticker} will close at ${prediction.mean().round(3)} next time window.')
print(f'2. The projected daily change is {(((prediction - Close) / Close)*100).mean()}%.')
if prediction < Close:
    print(f'3. {ticker} will be down from the previous closing of {Close}.')
elif prediction == Close:
    print(f'3. {ticker} will be the same as the previous closing of {Close}.')
else:
    print(f'3. {ticker} will be the up from the previous closing of {Close}.')

1. CDE will close at $5.44 next time window.
2. The projected daily change is -1.2704204088744682%.
3. CDE will be down from the previous closing of 5.510000228881836.
