In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns




In [2]:
from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error, mean_squared_error, mean_squared_log_error

In [4]:
from scipy.optimize import minimize
import statsmodels.tsa.api as smt
import statsmodels.api as sm

AttributeError: module 'numpy' has no attribute 'MachAr'

In [None]:
from tqdm import tqdm_notebook
from itertools import product

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_trye)) *100

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("stock_prices_sample.csv")

In [None]:
df.head(10)

# CLeaning of data

In [None]:
#df.set_index("DATE", inplace = True)

df.set_index("DATE", inplace = True)

In [None]:
#deleting all entries in ticker that have GEF
df = df[df.TICKER != 'GEF']
df = df[df.TYPE != 'Intraday']

In [None]:
#we only need very few entries so, we will be droping alot pf coulmns
#creating a variable of columns we will be dropping
drop_cols = ['SPLIT_RATIO', 'EX_DIVIDEND', 'ADJ_FACTOR', 'ADJ_VOLUME', 'ADJ_CLOSE', 'ADJ_LOW', 'ADJ_HIGH', 'ADJ_OPEN', 'VOLUME', 'FREQUENCY', 'TYPE', 'FIGI']
df.drop(drop_cols, axis = 1, inplace = True)

In [None]:
df

In [None]:
df["CLOSE"] = df["CLOSE"].values[::-1]

In [None]:
df

In [None]:
plt.figure(figsize=(17, 8))
plt.plot(df.CLOSE)
plt.title('Closing price of New Germany Fund Inc (GF)')
plt.ylabel('Closing price ($)')
plt.xlabel('Trading day')
plt.grid(True)
plt.show()

# moving average

In [None]:

def plot_moving_average(series, window, plot_intervals=False, scale=1.96):

    rolling_mean = series.rolling(window=window).mean()
    
    plt.figure(figsize=(17,8))
    plt.title('Moving average\n window size = {}'.format(window))
    plt.plot(rolling_mean, 'g', label='Rolling mean trend')
    
    if plot_intervals:
        mae = mean_absolute_error(series[window:], rolling_mean[window:])
        deviation = np.std(series[window:] - rolling_mean[window:])
        lower_bound = rolling_mean - (mae + scale * deviation)
        upper_bound = rolling_mean + (mae + scale * deviation)
        plt.plot(upper_bound, 'r--', label='Upper bound / Lower bound')
        plt.plot(lower_bound, 'r--')
            
    plt.plot(series[window:], label='Actual values')
    plt.legend(loc='best')
    plt.grid(True)

In [None]:
#Smooth by the previous 5 days (by week)
plot_moving_average(df.CLOSE, 5)

#Smooth by the previous month (30 days)
plot_moving_average(df.CLOSE, 30)

#Smooth by previous the 90 days
plot_moving_average(df.CLOSE, 90, plot_intervals=True)   

#smooth by the previous 100 days
plot_moving_average(df.CLOSE, 100, plot_intervals = True)

# Exponential smoothing

In [None]:
def exponential_smoothing(series, alpha):
    
    results = [series[0]]
    for x in range(1, len(series)):
        results.append(alpha * series[x] + (1-alpha) * results[x-1])
    return results

def plot_exponential_smoothing(series, alpha):
    
    plt.figure(figsize=(17, 8))
    for alpha in alpha:
        plt.plot(exponential_smoothing(series, alpha), label="Alpha{}".format(alpha))
        
    plt.plot(series.values, "c", label = "Actual")
    plt.legend(loc = "best")
    plt.axis("tight")
    plt.title("Exponential Smoothing")
    plt.grid(True)
    
    
plot_exponential_smoothing(df.CLOSE, [0.05, 0.3])

In [None]:
def exponential_smoothing(series, alpha):
    
    results = [series[0]]
    for x in range(1, len(series)):
        results.append(alpha * series[x] + (1-alpha) * results[x-1])
    return results

def plot_exponential_smoothing(series, alpha):
    
    plt.figure(figsize=(17, 8))
    for alpha in alpha:
        plt.plot(exponential_smoothing(series, alpha), label="Alpha{}".format(alpha))
        
    plt.plot(series.values, label = "Actual")
    plt.legend(loc = "best")
    #plt.axis("tight")
    plt.title("Exponential Smoothing")
    plt.grid(True)
    
    
plot_exponential_smoothing(df.CLOSE, [0.05, 0.3])

In [None]:
def double_exponential_smoothing(series, alpha, beta):

    result = [series[0]]
    for n in range(1, len(series)+1):
        if n == 1:
            level, trend = series[0], series[1] - series[0]
        if n >= len(series): # forecasting
            value = result[-1]
        else:
            value = series[n]
        last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
        trend = beta * (level - last_level) + (1 - beta) * trend
        result.append(level + trend)
    return result

def plot_double_exponential_smoothing(series, alphas, betas):
     
    plt.figure(figsize=(17, 8))
    for alpha in alphas:
        for beta in betas:
            plt.plot(double_exponential_smoothing(series, alpha, beta), label="Alpha {}, beta {}".format(alpha, beta))
    plt.plot(series.values, label = "Actual")
    plt.legend(loc="best")
    plt.axis('tight')
    plt.title("Double Exponential Smoothing")
    plt.grid(True)
    
plot_double_exponential_smoothing(df.CLOSE, alphas=[0.9, 0.02], betas=[0.9, 0.02])

# Modelling

## The dicky fuller test

In [None]:
def tsplot(y, lags=None, figsize=(17, 8), style = "bmh"):
    
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
        
    with plt.style.context(style = 'bmh'):
        fig = plt.figure(figsize = figsize)
        layout = (2,2)
        ts_ax = plt.subplot2grid(layout, (0,0), colspan=2)
        acf_ax = plt.subplot2grid(layout, (1,0))
        pacf_ax = plt.subplot2grid(layout, (1,1))
        
        
        y.plot(ax=ts_ax)
        p_value = sm.tsa.stattools.adfuller(y)[1]
        ts_ax.set_title("Time Series Analysis Plot\n Dickey Fuller: p={0:.5f}".format(p_value))
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
        plt.tight_layout()
        

tsplot(df.CLOSE, lags= 30)

#making the process stationary
df_diff = df.CLOSE - df.CLOSE.shift(1)

tsplot(df_diff[1:], lags=30)

# SARIMA