# AutoRegression MovingAverage Model on Stock Prices

### Imports

In [None]:
import pandas as pd
import numpy as np
import itertools
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARMA
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta 
from tqdm import tqdm_notebook as tqdm
plt.style.use('bmh')

In [None]:
df = pd.read_csv('SPY.csv')

df

## Preprocessing Data

### Datetime Conversion

In [None]:
# Datetime conversion
df['Date'] = pd.to_datetime(df.Date)

# Setting the index
df.set_index('Date', inplace=True)

# Dropping any NaNs
df.dropna(inplace=True)

In [None]:
df[['Close']]

### Scaling

In [None]:
scaler = MinMaxScaler()

# Scaling and Dropping other columns besides Close
df = pd.DataFrame(scaler.fit_transform(df[['Close']]),
                  index=df.index,
                  columns=df[['Close']].columns)

## Visualizing the Data

In [None]:
df.plot(figsize=(17,8), title='Closing Prices');

## Detrending

### Differencing the Data

In [None]:
# Differencing
df_diff = df.diff(1).dropna()

# Plotting
df_diff.plot(figsize=(16,4), title="Daily Changes in Closing Price")
plt.ylabel("Change in USD")
plt.show()

# Stationarity Testing
results = adfuller(df_diff.iloc[:,0].values)
print(f"Testing for Stationarity\n\tP-value: {results[1]}")

### ACF and PACF

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(16,8))
plot_acf(df_diff, ax=ax1, lags=40)
plot_pacf(df_diff, ax=ax2, lags=40)
plt.show()

## Parameter Search
Using a function to find the best parameters for the ARMA model

In [None]:
# Assigning variables to the p and q values
p = q = range(0,5)

# Finding all possible combinations of p and q
pq = list(itertools.product(p, q))


def param_search(model, data, pq):
    """
    Loops through each iteration of the model and returns the best performing parameter
    """
    
    # Empty dictionary containing the combination and AIC score
    lst = []
    
    # Loop to find the best combination
    for comb in tqdm(pq):
        try:
            # Instantiating the model
            mod = model(data,
                        order=comb)
            
            # Fitting the model
            output = mod.fit()
            
            # Appending to the list
            lst.append([comb, output.aic])
        
        except:
            continue
            
    # Sorting the list
    lst = sorted(lst, key=lambda i: i[1])
    
    # Returning the combination with the lowest score
    return lst[0]

## Modeling

#### Train, Test, Splitting

In [None]:
# Splitting 80/20
index = round(len(df)*.80)

train = df.iloc[:index]
test = df.iloc[index:]

In [None]:
best_parameter = param_search(ARMA, train, pq)

print("Best p, q combination:", best_parameter[0])

### Fitting the Model

In [None]:
model = ARMA(train,
             order=best_parameter[0])


output = model.fit()

### Summary and Diagnostics

In [None]:
print(output.summary())

### Forecasting

In [None]:
# Getting Forecast and Confidence Intervals
forecast = output.forecast(len(test))
fc       = forecast[0]
conf     = forecast[-1]

# Putting the Data into a Pandas Series
fc_series = pd.Series(fc, index=test.index)
up_conf   = pd.Series(conf[:, 0], index=test.index)
low_conf  = pd.Series(conf[:, 1], index=test.index)

### Validating

In [None]:
predictions = output.predict(start=1, end=len(train))

preds = pd.Series(predictions.values, index=train.index)

### Plotting

In [None]:
plt.figure(figsize=(17,6))

# Forecast plot
plt.plot(fc_series, label='Forecast')
# Confidence Interval shaded
plt.fill_between(low_conf.index, low_conf, up_conf, color='k', alpha=.15)
# Predictions against Training data
plt.plot(preds, label='Predictions', alpha=.5)

# Train and Test Plot
plt.plot(test, label='Test')
plt.plot(train, label='Train', alpha=.5)

# Plotting
plt.legend()
plt.show()

## Future Modeling

### Finding the Best Parameters for the Entire Dataset

In [None]:
# Searching for the optimum parameters
best_param = param_search(ARMA, df, pq)

print("Best p, q combination for the entire dataset:", best_parameter[0])

### Fitting the Model to the Entire Dataset

In [None]:
model = ARMA(df,
             order=best_param[0])


output = model.fit()

### Summary and Diagnostics

In [None]:
print(output.summary())

### Forecasting the Future

In [None]:
# Number of Periods to forecast
periods = 90

# Getting Forecast and Confidence Intervals
forecast = output.forecast(periods)
fc       = forecast[0].reshape(-1,1)
conf     = forecast[-1]

# Putting the Data into a Pandas Series
fc_series = pd.DataFrame(scaler.inverse_transform(fc), 
                         index=pd.date_range(start=df.index[-1]+timedelta(days=1), periods=periods, freq="B"))

up_conf   = pd.DataFrame(scaler.inverse_transform(conf[:, 0].reshape(-1,1)), 
                         index=pd.date_range(start=df.index[-1]+timedelta(days=1), periods=periods, freq="B"))

low_conf  = pd.DataFrame(scaler.inverse_transform(conf[:, 1].reshape(-1,1)), 
                         index=pd.date_range(start=df.index[-1]+timedelta(days=1), periods=periods, freq="B"))

# Unscaling the DF
df = pd.DataFrame(scaler.inverse_transform(df).reshape(1,-1)[0],
                  index=df.index)

### Plotting the Forecast

In [None]:
plt.figure(figsize=(17,6))

# Forecast plot
plt.plot(fc_series, label='Forecast')
# Confidence Interval shaded
plt.fill_between(low_conf.index, low_conf[0], up_conf[0], color='k', alpha=.15)

# Plotting the Real Values
plt.plot(df, label="Real Values")

# Plotting
plt.legend()
plt.show()

### Zooming In on the Forecast

In [None]:
plt.figure(figsize=(17,6))

# Forecast plot
plt.plot(fc_series, label='Forecast')
# Confidence Interval shaded
plt.fill_between(low_conf.index, low_conf[0], up_conf[0], color='k', alpha=.15)

# Plotting the Real Values
plt.plot(df, label="Real Values")

# Zooming In
plt.xlim("2020", "2020-10")

# Plotting
plt.legend()
plt.show()

## Trading the Model
Create a simple trading algorithm based on the model's predictions.

In [None]:
df.rename(columns={0:'Close'}, inplace=True)

In [None]:
df['DailyReturns'] = df.pct_change()

In [None]:
df

In [None]:
cash = 1000

balance = []

for i in range(len(df.Close)):
    if i == 0:
        balance.append(df.Close[i] * (cash // df.Close[i]))
    else:
        balance.append((df.DailyReturns[i]+1) * balance[i-1])

In [None]:
df['CashBalance'] = balance

In [None]:
df.CashBalance.plot()

### Run the ARMA model for predictions over a timeframe that constantly is added to or slides

In [None]:
def validater(n_per_in, n_per_out):
    """
    Runs a 'For' loop to iterate through the length of the DF and create predicted values for every stated interval
    Returns a DF containing the predicted values for the model with the corresponding index values based on a business day frequency
    """
    
    # Creating an empty DF to store the predictions
    predictions = pd.DataFrame(index=df.index, columns=[df.columns[0]])

    for i in range(1, len(df)-n_per_in, n_per_out):
        # Creating rolling intervals to predict off of
        x = df[-i - n_per_in:-i]

        # Predicting using rolling intervals
        yhat = model.predict(np.array(x).reshape(1, n_per_in, n_features))

        # Transforming values back to their normal prices
        yhat = close_scaler.inverse_transform(yhat)[0]

        # DF to store the values and append later, frequency uses business days
        pred_df = pd.DataFrame(yhat, 
                               index=pd.date_range(start=x.index[-1]+timedelta(days=1), 
                                                   periods=len(yhat), 
                                                   freq="B"),
                               columns=[x.columns[0]])

        # Updating the predictions DF
        predictions.update(pred_df)
        
    return predictions

In [None]:
# Days to predict in the future
pred_periods = 5

# List containing predicted prices
pred_prices = pd.DataFrame(index=df.index)

# Assigning variables to the p and q values
p = q = range(0,5)

# Finding all possible combinations of p and q
pq = list(itertools.product(p, q))

# How many periods in the past to use to make the prediction
past_periods = 300

for i in tqdm(range(0, len(df.Close)-pred_periods)):
    
    # Skipping the first 300 prices
    if i < 300:
        continue       
        
    else:
        # Instantiating the series to predict
        x = df.Close[i:i+past_periods]
        
        # Finding the best parameters so far
        best_param = param_search(ARMA, x, pq)

        # Fitting the model
        model = ARMA(x, order=best_param[0])

        output = model.fit()
        
        # Getting and appending predictions
        predictions = output.forecast(pred_periods)
        
        # Creating a DF that contains the predictions
        pred_df = pd.DataFrame(predictions,
                               index=pd.date_range(start=x.index[-1]+timedelta(days=1),
                                                   periods=len(predictions),
                                                   freq='B'))
        
        # Updating the DF
        pred_prices.update(pred_df)