In [1]:
import numpy as np
import datetime as dt
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import math
import warnings
warnings.filterwarnings("ignore")

In [2]:
ticker1 = 'CL=F'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df1 = web.DataReader(ticker1,'yahoo',start,end)
df1[[f'{ticker1} Adj Close']] = df1[['Adj Close']]
df1 = df1[[f'{ticker1} Adj Close']]
df1

Unnamed: 0_level_0,CL=F Adj Close
Date,Unnamed: 1_level_1
2015-01-02,52.689999
2015-01-05,50.040001
2015-01-06,47.930000
2015-01-07,48.650002
2015-01-08,48.790001
...,...
2020-05-19,32.360001
2020-05-20,33.599998
2020-05-21,33.889999
2020-05-22,33.560001


In [3]:
#OIL COMPANIES ('COP','XOM','BP','RDS-A','CVX','PSX')
ticker = 'PSX'
start = '2015-01-01'
end = '2020-05-10'
#end = dt.datetime.now()
df = web.DataReader(ticker,'yahoo',start,end)
df = df[['Open','High','Low','Close','Adj Close','Volume']]

In [4]:
def RSI(ticker):
    '''Coolest function... Can see the RSI values fluctuate (usually) from 20/30 to 70/80/90.
    Stocks that fluctuate and have good credit (or part of big indices) would be good picks 
    for trading strategy... below 50 is a buy indicator'''
    #end = dt.datetime.now()
    end = '2020-05-10'
    start = '2015-01-01'
    df = web.DataReader(ticker,'yahoo',start,end)
    df = df[['Adj Close']]
    # create a column called daily return
    df['Daily Return'] = df['Adj Close'].pct_change()*100
    # making a dataframe only for the percent return of each day
    df['Up/Down'] = np.sign(df['Daily Return'])
    #drop the nan values
    df.dropna(inplace=True)
    df['Up']=(df['Up/Down']>0).astype(int)
    df['Down']=(df['Up/Down']<0).astype(int)
    up = df['Up']
    down = df['Down']
    gain = pd.Series(up).rolling(window=14).sum()
    loss = pd.Series(down).rolling(window=14).sum()
    df['Gain'] = gain/14
    df['Loss'] = loss/14
    df['RS'] = df['Gain']/df['Loss']
    df['RSI'] = 100 - (100/(1+df['RS']))
    df.dropna(inplace=True)
    df = df[['RSI']]
    return(df)

In [5]:
df3 = RSI(ticker)

In [6]:
def EPS(symbol):
    from yahoo_earnings_calendar import YahooEarningsCalendar
    yec = YahooEarningsCalendar()
    ticker = (yec.get_earnings_of(symbol))
    data = pd.DataFrame(ticker)
    df = data.dropna()
    df['Date'] = pd.to_datetime(df['startdatetime']).dt.date
    df = df[['Date','ticker','epsestimate','epsactual','epssurprisepct']]
    df['Date'] = df['Date'].apply(pd.to_datetime)
    df = df.drop_duplicates(subset='Date')
    df = df.head(43)
    df = df.set_index('Date')
    df[['epssurprisepct']]
    data1 = web.DataReader(symbol,'yahoo',start,end)
    df1 = data1[['Adj Close']]
    df1 = df1.sort_index(ascending=False)
    df_combined = pd.concat([df,df1],axis=1)
    df_combined = df_combined.fillna(method='ffill')
    df_combined.dropna(inplace=True)
    df_combined=df_combined[['epssurprisepct']]
    return(df_combined)

In [7]:
df2 = EPS(ticker)
df2

Unnamed: 0_level_0,epssurprisepct
Date,Unnamed: 1_level_1
2015-01-02,15.36
2015-01-05,15.36
2015-01-06,15.36
2015-01-07,15.36
2015-01-08,15.36
...,...
2020-05-18,61.14
2020-05-19,61.14
2020-05-20,61.14
2020-05-21,61.14


In [8]:
df = pd.concat([df,df1,df2,df3],axis=1)
df = df[['Open','High','Low','Close','Adj Close','Volume',f'{ticker1} Adj Close','epssurprisepct','RSI']]
df = df.dropna()
df = df.reset_index()
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,CL=F Adj Close,epssurprisepct,RSI
0,2015-01-23,67.470001,69.190002,66.970001,68.519997,57.359859,4667600.0,45.590000,15.36,50.000000
1,2015-01-26,68.639999,69.040001,67.610001,68.400002,57.259403,4149700.0,45.150002,15.36,50.000000
2,2015-01-27,67.699997,69.779999,67.519997,69.599998,58.263947,4407900.0,46.230000,15.36,57.142857
3,2015-01-28,69.800003,70.059998,68.320000,68.370003,57.234295,6039300.0,44.450001,15.36,50.000000
4,2015-01-29,69.949997,70.690002,68.650002,70.480003,59.000629,5810100.0,44.529999,18.98,50.000000
...,...,...,...,...,...,...,...,...,...,...
1328,2020-05-18,75.000000,77.739998,74.739998,76.279999,76.279999,4532600.0,33.230000,61.14,57.142857
1329,2020-05-19,76.000000,76.809998,74.110001,75.220001,75.220001,3736700.0,32.360001,61.14,50.000000
1330,2020-05-20,76.809998,79.110001,76.650002,78.250000,78.250000,3501500.0,33.599998,61.14,57.142857
1331,2020-05-21,78.650002,79.099998,76.820000,77.089996,77.089996,2299500.0,33.889999,61.14,57.142857


In [9]:
X = df.drop(['Date','Close','Open'], axis=1, inplace=True)
y = df[['Adj Close']]

In [10]:
df = np.asmatrix(df)
df

matrix([[69.19000244, 66.97000122, 57.35985947, ..., 45.59000015,
         15.36      , 50.        ],
        [69.04000092, 67.61000061, 57.25940323, ..., 45.15000153,
         15.36      , 50.        ],
        [69.77999878, 67.51999664, 58.26394653, ..., 46.22999954,
         15.36      , 57.14285714],
        ...,
        [79.11000061, 76.65000153, 78.25      , ..., 33.59999847,
         61.14      , 57.14285714],
        [79.09999847, 76.81999969, 77.08999634, ..., 33.88999939,
         61.14      , 57.14285714],
        [76.83999634, 75.15000153, 76.45999908, ..., 33.56000137,
         61.14      , 50.        ]])

In [11]:
from sklearn.model_selection import train_test_split
# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.25,  random_state=0)

In [12]:
from sklearn.linear_model import LinearRegression
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [13]:
intercept = regression_model.intercept_[0]

In [14]:
regression_model.score(X_test, y_test)

1.0

In [15]:
from sklearn.metrics import mean_squared_error
y_predict = regression_model.predict(X_test)
regression_model_mse = mean_squared_error(y_predict, y_test)
regression_model_mse

8.699199060209672e-28

In [16]:
math.sqrt(regression_model_mse)

2.9494404656154147e-14

In [17]:
# input the latest Open, High, Low, Close, Volume
# predicts the next day price
data2 = web.DataReader(ticker1,'yahoo',start,dt.datetime.now())
data1 = web.DataReader(ticker,'yahoo',start,dt.datetime.now())
eps = df2['epssurprisepct'].iloc[-1]
High = data1['High'].iloc[-1]
Low = data1['Low'].iloc[-1]
Close = data1['Adj Close'].iloc[-1]
Volume = data1['Volume'].iloc[-1]
Gold = data2['Adj Close'].iloc[-1]
RSI = df3['RSI'].iloc[-1]
prediction = regression_model.predict([[eps, High, Low, Close, Volume, Gold, RSI]])
print(f'1. {ticker} will close at ${prediction.mean().round(3)} next time window.')
print(f'2. The projected daily change is {(((prediction - Close) / Close)*100).mean()}%.')
if prediction < Close:
    print(f'3. {ticker} will be down from the previous closing of {Close}.')
elif prediction == Close:
    print(f'3. {ticker} will be the same as the previous closing of {Close}.')
else:
    print(f'3. {ticker} will be the up from the previous closing of {Close}.')

1. PSX will close at $75.15 next time window.
2. The projected daily change is -1.7133109785433671%.
3. PSX will be down from the previous closing of 76.45999908447266.
