### Based on https://github.com/borisbanushev/stockpredictionai

In [None]:
from utils import yf_reader

#Ticker = {'symbol': 'GS', 'name': 'Goldman Sachs', 'csv': 'data/GS.csv', 'currency': 'USD'}
Ticker = {'symbol': 'TGYM.MI', 'name': 'Technogym', 'csv': 'data/TGYM.csv', 'currency': 'EUR'}

df = yf_reader.get_history(Ticker['symbol'], Ticker['csv'])

df.info()

In [None]:
from utils import ti

import datetime
import numpy as np
import pandas as pd

#import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

#import math

#from sklearn.decomposition import PCA
#from sklearn.preprocessing import MinMaxScaler
#from sklearn.preprocessing import StandardScaler
#from sklearn.metrics import accuracy_score

#import xgboost as xgb

#import warnings
#warnings.filterwarnings("ignore")

#from mxnet import nd, autograd, gluon
#from mxnet.gluon import nn, rnn
#import mxnet as mx
#context = mx.cpu(); model_ctx=mx.cpu()
#mx.random.seed(1719)

In [None]:
dateparse = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d')

df = pd.read_csv(Ticker['csv'], header=0, parse_dates=[0], date_parser=dateparse)

df.drop(columns=['Open', 'High', 'Low', 'Dividends', 'Stock Splits'], inplace=True)
df.rename(columns={'Close': 'price'}, inplace=True)

# Split dataset
tt_cutoff = 0.8
num_training_days = int(df.shape[0]*tt_cutoff)
start_date = df.iloc[0]['Date']
cutoff_date = df.iloc[num_training_days-1]['Date']

print(f'There are {df.shape[0]} number of days in the dataset.')
print(f'Train/Test cut-off set to {tt_cutoff}')
print(f'Number of training days: {num_training_days}. [{start_date.strftime("%Y-%m-%d")} to {cutoff_date.strftime("%Y-%m-%d")}]')
print(f'Number of test days: {df.shape[0]-num_training_days}.')

In [None]:
plt.figure(figsize=(14, 5), dpi=100)
plt.plot(df['Date'], df['price'], label='Stock price')
plt.vlines(cutoff_date, 0, df['price'].max(), linestyles='--', colors='gray', label='Train/Test data cut-off')
plt.xlabel('Date')
plt.ylabel(Ticker['currency'])
plt.title(f'Ticker["name"] stock price')
plt.legend()
plt.show()

In [None]:
df_TI = ti.get_technical_indicators(df)

ti.plot_technical_indicators(df_TI, 400)

In [None]:
close_fft = np.fft.fft(np.asarray(df['price'].tolist()))
fft_df = pd.DataFrame({'fft':close_fft})
df['absolute'] = fft_df['fft'].apply(lambda x: np.abs(x))
df['angle'] = fft_df['fft'].apply(lambda x: np.angle(x))
fft_list = np.asarray(fft_df['fft'].tolist())
for num_ in [3, 6, 9, 100]:
    fft_ = np.copy(fft_list); fft_[num_:-num_]=0
    df['fft_{}'.format(num_)] = np.real(np.fft.ifft(fft_))
    
plt.figure(figsize=(14, 7), dpi=100)
plt.plot(df['fft_3'], label='Fourier transform with 3 components')
plt.plot(df['fft_6'], label='Fourier transform with 6 components')
plt.plot(df['fft_9'], label='Fourier transform with 9 components')
plt.plot(df['fft_100'], label='Fourier transform with 100 components')
plt.plot(df['price'], label='Real')
plt.xlabel('Days')
plt.ylabel('USD')
plt.title('Stock prices & Fourier transforms')
plt.legend()
plt.show()

In [None]:
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df['price'].to_list())
plt.figure(figsize=(10, 7), dpi=80)
plt.show() 

In [None]:
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm


def ARIMAForecasting(data, start_params, best_pdq=(5,1,2)):
    model = ARIMA(data, order=best_pdq)
    model_fit = model.fit(start_params = start_params)
    prediction = model_fit.forecast()
    #This returns only last step
    return prediction[0][-1], model_fit.params


all_data = df['price'].to_list()
train_data = all_data[0:num_training_days]
test_data = all_data[num_training_days:]

data = train_data
predictions = list()
fit_params = None
for t in tqdm(range(len(test_data))):
    real_value = test_data[t]
    yhat, _params = ARIMAForecasting(data, fit_params)
    fit_params = _params
    predictions.append(yhat)
    data.append(real_value)
    
error = mean_squared_error(test_data, predictions)
print('Test MSE: %.3f' % error)

plt.figure(figsize=(12, 6), dpi=100)
plt.plot(test_data, label='Real')
plt.plot(predictions, color='red', label='Predicted')
plt.xlabel('Days')
plt.ylabel('USD')
plt.title('Figure 5: ARIMA model')
plt.legend()
plt.show()