In [1]:
import numpy as np
import datetime as dt
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import math
import warnings
warnings.filterwarnings("ignore")

In [3]:
ticker1 = 'GC=F'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df1 = web.DataReader(ticker1,'yahoo',start,end)
df1[[f'{ticker1} Adj Close']] = df1[['Adj Close']]
df1 = df1[[f'{ticker1} Adj Close']]
df1

Unnamed: 0_level_0,GC=F Adj Close
Date,Unnamed: 1_level_1
2015-01-02,1186.000000
2015-01-05,1203.900024
2015-01-06,1219.300049
2015-01-07,1210.599976
2015-01-08,1208.400024
...,...
2020-05-19,1749.000000
2020-05-20,1749.699951
2020-05-21,1725.300049
2020-05-22,1734.699951


In [3]:
#Small-Cap Gold Miners ('AUY','HMY','AU','GOLD','KGC')
ticker = 'GOLD'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df = web.DataReader(ticker,'yahoo',start,end)
df = df[['Open','High','Low','Close','Adj Close','Volume']]

In [4]:
def RSI(ticker):
    '''Coolest function... Can see the RSI values fluctuate (usually) from 20/30 to 70/80/90.
    Stocks that fluctuate and have good credit (or part of big indices) would be good picks 
    for trading strategy... below 50 is a buy indicator'''
    #end = dt.datetime.now()
    end = '2020-05-10'
    start = '2015-01-01'
    df = web.DataReader(ticker,'yahoo',start,end)
    df = df[['Adj Close']]
    # create a column called daily return
    df['Daily Return'] = df['Adj Close'].pct_change()*100
    # making a dataframe only for the percent return of each day
    df['Up/Down'] = np.sign(df['Daily Return'])
    #drop the nan values
    df.dropna(inplace=True)
    df['Up']=(df['Up/Down']>0).astype(int)
    df['Down']=(df['Up/Down']<0).astype(int)
    up = df['Up']
    down = df['Down']
    gain = pd.Series(up).rolling(window=14).sum()
    loss = pd.Series(down).rolling(window=14).sum()
    df['Gain'] = gain/14
    df['Loss'] = loss/14
    df['RS'] = df['Gain']/df['Loss']
    df['RSI'] = 100 - (100/(1+df['RS']))
    df.dropna(inplace=True)
    df = df[['RSI']]
    return(df)

In [5]:
df3 = RSI(ticker)

In [6]:
def EPS(symbol):
    from yahoo_earnings_calendar import YahooEarningsCalendar
    yec = YahooEarningsCalendar()
    ticker = (yec.get_earnings_of(symbol))
    data = pd.DataFrame(ticker)
    df = data.dropna()
    df['Date'] = pd.to_datetime(df['startdatetime']).dt.date
    df = df[['Date','ticker','epsestimate','epsactual','epssurprisepct']]
    df['Date'] = df['Date'].apply(pd.to_datetime)
    df = df.drop_duplicates(subset='Date')
    df = df.head(43)
    df = df.set_index('Date')
    df[['epssurprisepct']]
    data1 = web.DataReader(symbol,'yahoo',start,end)
    df1 = data1[['Adj Close']]
    df1 = df1.sort_index(ascending=False)
    df_combined = pd.concat([df,df1],axis=1)
    df_combined = df_combined.fillna(method='ffill')
    df_combined.dropna(inplace=True)
    df_combined=df_combined[['epssurprisepct']]
    return(df_combined)

In [7]:
df2 = EPS(ticker)
df2

Unnamed: 0_level_0,epssurprisepct
Date,Unnamed: 1_level_1
2015-01-02,15.15
2015-01-05,15.15
2015-01-06,15.15
2015-01-07,15.15
2015-01-08,15.15
...,...
2020-05-18,0.63
2020-05-19,0.63
2020-05-20,0.63
2020-05-21,0.63


In [8]:
df = pd.concat([df,df1,df2,df3],axis=1)
df = df[['Open','High','Low','Close','Adj Close','Volume',f'{ticker1} Adj Close','epssurprisepct','RSI']]
df = df.dropna()
df = df.reset_index()
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,GC=F Adj Close,epssurprisepct,RSI
0,2015-01-23,12.660000,12.790000,12.340000,12.430000,6.720088,19713500.0,1292.599976,15.15,57.142857
1,2015-01-26,12.100000,12.760000,12.000000,12.690000,6.860652,15653600.0,1279.400024,15.15,64.285714
2,2015-01-27,12.890000,13.250000,12.840000,13.110000,7.087718,23691300.0,1291.699951,15.15,64.285714
3,2015-01-28,12.920000,13.120000,12.490000,12.610000,6.817401,19198600.0,1285.900024,15.15,64.285714
4,2015-01-29,12.280000,12.430000,12.070000,12.290000,6.644399,18368200.0,1254.599976,15.15,64.285714
...,...,...,...,...,...,...,...,...,...,...
1327,2020-05-18,28.080000,28.230000,26.930000,27.200001,27.200001,17293500.0,1736.500000,0.63,64.285714
1328,2020-05-19,27.520000,28.250000,27.410000,28.100000,28.100000,17810600.0,1749.000000,0.63,71.428571
1329,2020-05-20,28.290001,28.360001,27.219999,27.250000,27.250000,16291200.0,1749.699951,0.63,71.428571
1330,2020-05-21,26.959999,26.969999,25.610001,26.260000,26.260000,22018800.0,1725.300049,0.63,64.285714


In [9]:
X = df.drop(['Date','Close','Open'], axis=1, inplace=True)
y = df[['Adj Close']]

In [10]:
df = np.asmatrix(df)
df

matrix([[1.27900000e+01, 1.23400002e+01, 6.72008801e+00, ...,
         1.29259998e+03, 1.51500000e+01, 5.71428571e+01],
        [1.27600002e+01, 1.20000000e+01, 6.86065245e+00, ...,
         1.27940002e+03, 1.51500000e+01, 6.42857143e+01],
        [1.32500000e+01, 1.28400002e+01, 7.08771753e+00, ...,
         1.29169995e+03, 1.51500000e+01, 6.42857143e+01],
        ...,
        [2.83600006e+01, 2.72199993e+01, 2.72500000e+01, ...,
         1.74969995e+03, 6.30000000e-01, 7.14285714e+01],
        [2.69699993e+01, 2.56100006e+01, 2.62600002e+01, ...,
         1.72530005e+03, 6.30000000e-01, 6.42857143e+01],
        [2.69599991e+01, 2.62299995e+01, 2.63199997e+01, ...,
         1.73469995e+03, 6.30000000e-01, 6.42857143e+01]])

In [11]:
from sklearn.model_selection import train_test_split
# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.25,  random_state=0)

In [12]:
from sklearn.linear_model import LinearRegression
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [13]:
intercept = regression_model.intercept_[0]

In [14]:
regression_model.score(X_test, y_test)

1.0

In [15]:
from sklearn.metrics import mean_squared_error
y_predict = regression_model.predict(X_test)
regression_model_mse = mean_squared_error(y_predict, y_test)
regression_model_mse

1.871824936679949e-24

In [16]:
math.sqrt(regression_model_mse)

1.3681465333362317e-12

In [17]:
# input the latest Open, High, Low, Close, Volume
# predicts the next day price
data2 = web.DataReader(ticker1,'yahoo',start,dt.datetime.now())
data1 = web.DataReader(ticker,'yahoo',start,dt.datetime.now())
eps = df2['epssurprisepct'].iloc[-1]
High = data1['High'].iloc[-1]
Low = data1['Low'].iloc[-1]
Close = data1['Adj Close'].iloc[-1]
Volume = data1['Volume'].iloc[-1]
Gold = data2['Adj Close'].iloc[-1]
RSI = df3['RSI'].iloc[-1]
prediction = regression_model.predict([[eps, High, Low, Close, Volume, Gold, RSI]])
print(f'1. {ticker} will close at ${prediction.mean().round(3)} next time window.')
print(f'2. The projected daily change is {(((prediction - Close) / Close)*100).mean()}%.')
if prediction < Close:
    print(f'3. {ticker} will be down from the previous closing of {Close}.')
elif prediction == Close:
    print(f'3. {ticker} will be the same as the previous closing of {Close}.')
else:
    print(f'3. {ticker} will be the up from the previous closing of {Close}.')

1. GOLD will close at $26.23 next time window.
2. The projected daily change is -0.3419458780034378%.
3. GOLD will be down from the previous closing of 26.31999969482422.
