In [None]:
!pip install statsmodels>=0.9.0
!pip install yfinance --upgrade --no-cache-dir
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from IPython.display import display
import matplotlib.pyplot as plt
from pylab import rcParams
import statsmodels.api as sm
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing, Holt

import pandas_datareader as pdr
import datetime 
import math
import yfinance as yf
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
from math import sqrt

#from sklearn.preprocessing import MinMaxScaler
#from keras.models import Sequential
#from keras.layers import Dense, Dropout, LSTM

In [None]:
#df = pd.DataFrame()
#data = yf.Ticker("BTC")
#df = data.history(period="max", interval= '1m', start="2021-05-10", end="2021-05-30")
#df = yf.download("AAPL", start="2021-05-01", end="2021-05-30")

In [None]:
# BTC-USD is the bitoin paired with American dollar

df = pdr.get_data_yahoo('JPM', start=datetime.datetime(2021, 1, 1), end=datetime.datetime(2021, 5, 31))

In [None]:
display(df.head())
display(df.tail())

In [None]:
display(df.info())
display(df.columns)

***
### One of the most important things to do is to check the quality of your data, now we check if there is any missing data from our dataset.
***

In [None]:
df.isnull().any()

***
## Now let's visualize some charts!
***

In [None]:
rcParams['figure.figsize'] = 22, 12

#plt.plot(df.Volume)
#plt.title('Volume')


fig, axs = plt.subplots(3, 2)

axs[0, 0].plot( df.index, df.Open, color = 'red')
#axs[0, 0].xticks(rotation=45)
axs[0, 0].set_title('Open')
    
axs[0, 1].plot( df.index, df.Close, color = 'blue')
axs[0, 1].set_title('Close')
    
axs[1, 0].plot( df.index, df.High, color = 'green')
axs[1, 0].set_title('High')

axs[1, 1].plot( df.index, df.Low, color = 'grey')
axs[1, 1].set_title('Low')

axs[2, 0].plot( df.index, df['Adj Close'], color = 'purple')
axs[2, 0].set_title('Adj Close')

axs[2, 1].plot( df.index, df.Volume, color = 'black')
axs[2, 1].set_title('Volume')



***
### ***What if we create some features?***
#### I was thinking about creating features like: Relative Strength Index, Moving Average, Money Flow Index, Stochastics, MACD, Bollinger Bands, any other features we can.
***

### To create the features, we'll use the Close column. Let's create the ***Moving Average, Bollinger Bands, Stochastic index, RSI, MACD and EMA***:

In [None]:
# Testing this function
def RSI(prices, n):
        deltas = np.diff(prices)
        seed = deltas[:n+1]
        up = seed[seed >= 0].sum()/n
        down = -seed[seed < 0].sum()/n
        rs = up/down
        rsi = np.zeros_like(prices)
        rsi[:n] = 100. - 100./(1.+rs)

        for i in range(n, len(prices)):
            delta = deltas[i-1]  # The diff is 1 shorter

            if delta > 0:
                upval = delta
                downval = 0.
            else:
                upval = 0.
                downval = -delta

            up = (up*(n-1) + upval)/n
            down = (down*(n-1) + downval)/n

            rs = up/down
            rsi[i] = 100. - 100./(1.+rs)

        return rsi


In [None]:
# Moving average
df['MA6'] = df['Close'].rolling(window=6).mean()
df['MA12'] = df['Close'].rolling(window=12).mean()
df['MA26'] = df['Close'].rolling(window=26).mean()

# Moving average convergence divergence
df['MACD'] = df['MA12'] - df['MA26']
df['MACDSIG'] = df['MACD'].ewm(span = 9).mean()

# Exponential moving average
df['EMA6'] = df['Close'].ewm(span = 6).mean()
df['EMA12'] = df['Close'].ewm(span = 12).mean()
df['EMA24'] = df['Close'].ewm(span = 24).mean()


In [None]:
# Relative strength index
df['RSI6'] = RSI(df['Close'].copy(), 6)
df['RSI12'] = RSI(df['Close'].copy(), 12)
df['RSI24'] = RSI(df['Close'].copy(), 24)

In [None]:
def ulcer_index(df): 
  max_close = df["Close"].max()
  df["Pct Drawdown"] = ((df["Close"] - max_close)/max_close) * 100
  df["Pct Drawdown Sq"] = df["Pct Drawdown"].map(lambda x: x ** 2.0)

  square_avg = ((df["Pct Drawdown Sq"].sum())/float(len(df["Pct Drawdown Sq"])))
  ulcer_index = math.sqrt(square_avg)
  return ulcer_index

In [None]:
tmp = ulcer_index(df.copy())
tmp

In [None]:
df.tail()

In [None]:
#Stochastic index

df['STOCH_14high'] = df['High'].rolling(14).max()
df['STOCH_14low'] = df['Low'].rolling(14).min()
df['STOCH_%K'] = (df['Close'] - df['STOCH_14low'])*100/(df['STOCH_14high'] - df['STOCH_14low'])
df['STOCH_%D'] = df['STOCH_%K'].rolling(3).mean()
df.head()

In [None]:
#Bollinger Bands

df['MA20'] = df['Close'].rolling(window=20).mean()
df['20STD'] = df['Close'].rolling(window=20).std() 

df['Upper'] = df['MA20'] + (df['20STD'] * 2)
df['Lower'] = df['MA20'] - (df['20STD'] * 2)

In [None]:
df.dropna(inplace=True, axis=0)

## Let's have a look at it.

In [None]:
#rcParams['figure.figsize'] = 19, 10

plt.figure(figsize=(18, 5), dpi=80)
plt.plot(df.Close, label = 'Close')
plt.title('Close')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.grid()
#plt.xlim([df.index.min(), df.index.max()])


fig, axs = plt.subplots(6, 1)

#Mobing Average
axs[0].legend()
axs[0].plot(df.MA6, color = 'red', label = 'MA6')
axs[0].plot(df.MA12, color = 'green', label = 'MA12')
axs[0].plot(df.MA26, color = 'blue', label = 'MA26')
axs[0].legend(loc='center left', bbox_to_anchor=(1, 0.5))
#axs[0].grid()

#MACD
axs[1].legend()
axs[1].plot(df.MACD, color = 'blue', label = 'MACD')
axs[1].plot(df.MACDSIG, color = 'red', label = 'SIGNAL')
axs[1].legend(loc='center left', bbox_to_anchor=(1, 0.5))
#axs[1].grid()
    
#EMA
axs[2].legend()
axs[2].plot(df.EMA6, color = 'red', label = 'EMA6')
axs[2].plot(df.EMA12, color = 'green', label = 'EMA12')
axs[2].plot(df.EMA24, color = 'blue', label = 'EMA24')
axs[2].legend(loc='center left', bbox_to_anchor=(1, 0.5))
#axs[2].grid()

#RSI
axs[3].legend()
axs[3].plot(df.RSI6, color = 'red', label = 'RSI6')
axs[3].plot(df.RSI12, color = 'green', label = 'RSI12')
axs[3].plot(df.RSI24, color = 'blue', label = 'RSI24')
axs[3].axhline(y=70,linestyle='--',color='gray', label = '70')
axs[3].axhline(y=30,linestyle='--',color='gray', label = '30')
axs[3].legend(loc='center left', bbox_to_anchor=(1, 0.5))
#axs[3].grid()

#Stochastic
axs[4].legend()
axs[4].plot(df['STOCH_%K'], color = 'blue', label = '%K')
axs[4].plot(df['STOCH_%D'], color = 'red', label = '%D')
axs[4].axhline(y=80,linestyle='--',color='gray', label = '80')
axs[4].axhline(y=20,linestyle='--',color='gray', label = '20')
axs[4].legend(loc='center left', bbox_to_anchor=(1, 0.5))
#axs[4].grid()

#Bollinger Bands
axs[5].legend()
axs[5].plot(df['Close'], color = 'blue', label = 'Close')
axs[5].plot(df['Upper'], color = 'red', label = 'Upper')
axs[5].plot(df['Lower'], color = 'green', label = 'Lower')
#axs[5].plot(df['MA20'], color = 'black', label = 'MA20')
axs[5].legend(loc='center left', bbox_to_anchor=(1, 0.5))
#axs[5].grid()


Traditional interpretation and usage of the RSI are that values of 70 or above indicate that a security is becoming overbought or overvalued and may be primed for a trend reversal or corrective pullback in price. An RSI reading of 30 or below indicates an oversold or undervalued condition.



## **Time to train some models...**

In [None]:
def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(window=12).mean()
    rolstd = timeseries.rolling(window=12).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)

In [None]:
test_stationarity(df.Close)

In [None]:
train=df[0:int(df.shape[0]*0.8)] 
test=df[int(df.shape[0]*0.8)-1:]

In [None]:
train.Close.plot(figsize=(15,6), fontsize=14)
test.Close.plot(figsize=(15,6), fontsize=14)
plt.show()

## **Making it stationary**

In [None]:
rcParams['figure.figsize'] = 15, 6
ts_log = np.log(df.Close)
plt.plot(ts_log)

In [None]:
rcParams['figure.figsize'] = 15, 6
moving_avg = ts_log.rolling(12).mean()
plt.plot(ts_log)
plt.plot(moving_avg, color='red')

In [None]:
ts_moving_avg_diff = ts_log - moving_avg
ts_moving_avg_diff.head(12)

In [None]:
ts_moving_avg_diff.dropna(inplace=True)
test_stationarity(ts_moving_avg_diff)

In [None]:
expweighted_mavg = ts_log.ewm(halflife = 12).mean()
plt.plot(ts_log)
plt.plot(expweighted_mavg, color='red')

In [None]:
ts_log_ewma_diff = ts_log - expweighted_mavg
test_stationarity(ts_log_ewma_diff)

In [None]:
ts_log_diff = ts_log - ts_log.shift()
plt.plot(ts_log_diff)

In [None]:
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)

## **Forecasting**

In [None]:
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA

In [None]:
lag_acf = acf(ts_log_diff, nlags = 20)
lag_pacf = pacf(ts_log_diff, nlags=20, method = 'ols')

In [None]:
plt.plot(lag_acf, label='acf')
plt.plot(lag_pacf, label='pacf')
plt.legend(loc='upper right')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.xlim([0, 8]) 

## **Arima**

In [None]:
model = ARIMA(ts_log, order = (3,1,3))
results_ARIMA = model.fit(disp=-1)
plt.plot(ts_log_diff)
plt.plot(results_ARIMA.fittedvalues, color='red')
plt.title('RSS: %.4f'% sum((results_ARIMA.fittedvalues-ts_log_diff)**2))

In [None]:
predicted_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy = True)
predicted_ARIMA_diff.head()

In [None]:
predicted_ARIMA_diff_cumsum = predicted_ARIMA_diff.cumsum()
predicted_ARIMA_diff_cumsum.head()

In [None]:
predictions_ARIMA_log = pd.Series(ts_log.iloc[0], index=ts_log.index)
predictions_ARIMA_log = predictions_ARIMA_log.add(predicted_ARIMA_diff_cumsum,fill_value=0)
predictions_ARIMA_log.head()

In [None]:
predictions_ARIMA = np.exp(predictions_ARIMA_log)
plt.plot(df.Close)
plt.plot(predictions_ARIMA)
plt.title('RMSE: %.4f'% np.sqrt(sum((predictions_ARIMA-df.Close)**2)/len(df)))

In [None]:
results_ARIMA.plot_predict(1,100)
#plt.plot(df.Close, color='red')
plt.legend(loc='upper left')

## **Naive prediction**

## **Holt-Winters Method**

## **LSTM**