# ARIMA Forecasting on Stock Price Data

This notebook performs time series analysis using an ARIMA(0,1,0) model on stock price data. It includes missing value handling, technical indicator computation, and forecast visualization.

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

## Function: `dt(loc)`
Reads a CSV file containing stock data and parses dates.

In [None]:
def dt(loc):
    # Read CSV file and parse dates
    stock = pd.read_csv(loc, parse_dates=True)
    return stock

## Function: `fillin(loc)`
Handles missing values in the 'Close' column using linear interpolation and backfilling.

In [None]:
def fillin(loc):
    stock = dt(loc)
    # Identify and interpolate missing values where gaps are small
    g = stock['Close'].isna().cumsum().where(stock['Close'].isna())
    stock['Close'] = stock['Close'].where(g.map(g.value_counts()) > 3, stock['Close'].interpolate('linear'))
    # Backfill any remaining missing values
    stock['Close'] = stock['Close'].bfill()
    return stock

## Function: `markers(loc)`
Computes moving averages (7-day and 30-day), returns, and RSI for the stock data.

In [None]:
def markers(loc):
    stock = fillin(loc)
    # Calculate 7-day and 30-day moving averages (excluding current day)
    stock['7DMA'] = stock['Close'].shift(1).rolling(window=7, min_periods=0).mean()
    stock['30DMA'] = stock['Close'].shift(1).rolling(window=30, min_periods=0).mean()
    # Calculate daily returns as percent change
    stock['Return'] = stock['Close'].shift(1).pct_change() * 100
    # Calculate RSI (Relative Strength Index)
    change = stock['Close'].diff()
    avgain = change.where(change > 0, 0).shift(1).rolling(window=14, min_periods=1).sum() / 14
    avloss = abs(change.where(change < 0, 0).shift(1).rolling(window=14, min_periods=1).sum()) / 14
    stock['RSI'] = (avgain / (avgain + avloss)) * 100
    return stock

## Function: `ins(loc)`
Extracts the 'Close' column for modeling purposes.

In [None]:
def ins(loc):
    stock = markers(loc)
    inp = pd.DataFrame()
    inp['Close'] = stock['Close']
    return inp

## Function: `arima(loc)`
Fits an ARIMA(0,1,0) model to the training data, generates forecasts, and visualizes the results with confidence intervals.

In [None]:
def arima(loc):
    inp = ins(loc)
    # Split data into training and testing sets (80% train, 20% test)
    dtrain, dtest = inp[:int(len(inp) * 0.8)], inp[int(len(inp) * 0.8):]
    # Fit ARIMA(0,1,0) model (random walk with drift)
    makem = ARIMA(dtrain, order=(0, 1, 0))
    fitm = makem.fit()
    # Forecast for the length of the test set
    pred = fitm.get_forecast(steps=len(dtest))
    predmean = pred.predicted_mean
    confint = pred.conf_int()
    # Calculate and print MAE (Mean Absolute Error)
    mae = mean_absolute_error(dtest['Close'], predmean)
    print(f'MAE: {mae:.2f}')
    # Plot training data, actual test values, and forecast with confidence intervals
    plt.figure(figsize=(12, 6))
    plt.plot(dtrain.index, dtrain['Close'], label='Train')
    plt.plot(dtest.index, dtest['Close'], label='Actual')
    plt.plot(dtest.index, predmean, color='red', label='Prediction')
    plt.fill_between(dtest.index, confint.iloc[:, 0], confint.iloc[:, 1], color='pink', alpha=0.3)
    plt.xlabel('Index')
    plt.ylabel('Close Price')
    plt.title('ARIMA(0,1,0) Forecast vs Actual')
    plt.legend()
    plt.show()

## Running the ARIMA Model

In [None]:
arima(Path.home() / 'Downloads' / 'Amazon' / 'amzn.us.csv')