In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [2]:
import numpy as np
import pandas as pd
import time
from datetime import datetime
from pymongo import MongoClient

from src.config_tickets import ticket_lst
from src.scraping import WebScraping
from src.settings import HOST
from src.query_data import QueryData
from src.utilities import run_model_with_parameters, run_model_without_parameters

import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [3]:
# Display and Plotting
import matplotlib.pylab as plt
import seaborn as sns

from ipywidgets import interactive, widgets, RadioButtons, ToggleButton, Select, FloatSlider, FloatRangeSlider, IntSlider, fixed

pd.set_option('display.float_format', lambda x: '%.5f' % x) # pandas
np.set_printoptions(precision=5, suppress=True) # numpy

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# seaborn plotting style
sns.set(style='ticks', context='poster')

In [4]:
def connect_2_dbServer():
    mongoClient = MongoClient(HOST)
    return mongoClient

In [5]:
client = connect_2_dbServer()
query = QueryData(client)

In [6]:
lst_ticket = query.get_list_ticket(index='VN 30 (VNI30)')
print(lst_ticket)

['Ho Chi Minh City Infrastructure Investment JSC', 'Ho Chi Minh City Development Joint Stock Commercial Bank', 'Hoa Phat Group JSC', 'Masan Group Corp', 'Military Commercial Joint Stock Bank', 'Mobile World Investment Corp', 'No Va Land Investment Group Corp', 'PetroVietnam Fertilizer and Chemicals Corp', 'Phu Nhuan Jewelry JSC', 'Refrigeration Electrical Engineering Corp', 'Sai Gon Thuong Tin Commercial Joint Stock Bank', 'Saigon Beer Alcohol Beverage Corp', 'Saigon Securities Incorporation', 'Vietnam Technological And Commercial Joint Stock Bank', 'Thanh Thanh Cong Tay Ninh JSC', 'Joint Stock Commercial Bank for Foreign Trade of Vietnam', 'Vietjet Aviation JSC', 'Vietnam Export Import Commercial Joint Stock Bank', 'Vietnam JSCmmercial Bank for Industry and Trade', 'Vietnam Prosperity Joint Stock Commercial Bank', 'Vietnam Dairy Products JSC', 'Vincom Retail JSC', 'Vingroup JSC', 'Vinhomes JSC', 'Petrovietnam Gas JSC']


In [7]:
start = datetime(2010,1,1)
end = datetime(2019,1,1)

print(lst_ticket[:5])

df = query.get_historical_data(lst_ticket[:5], start=start, end=end)
closed_price = dict()
for ticket in lst_ticket[:5]:
    closed_price[ticket] = df[df.name == ticket]['close']

['Ho Chi Minh City Infrastructure Investment JSC', 'Ho Chi Minh City Development Joint Stock Commercial Bank', 'Hoa Phat Group JSC', 'Masan Group Corp', 'Military Commercial Joint Stock Bank']


## Plot ACF and PACF

In [8]:
from statsmodels.tsa.stattools import pacf, acf

def calculate_acf(time_series, lag=20, alpha=0.01):
    x = time_series.values
    acf_value, confint = acf(x, nlags=lag, alpha=alpha)
    confint_lower = confint[:, 0] - acf_value
    confint_upper = confint[:, 1] - acf_value
    return acf_value, confint_upper, confint_lower


def calculate_pacf(time_series, lag=20, alpha=0.01):
    x = time_series.values
    pacf_value, confint = pacf(x, nlags=lag, alpha=alpha)
    confint_lower = confint[:, 0] - pacf_value
    confint_upper = confint[:, 1] - pacf_value
    return pacf_value, confint_upper, confint_lower

## Run model

### ARIMA model

In [9]:
for ticket in lst_ticket[:5]:
    time_series = df[df.name == ticket]['close']
    result = run_model_with_parameters(time_series, model_selection='ARIMA')
    time.sleep(1)
    print(ticket)
    print('Training result:', result['train_evaluation'])
    print('Testing result:', result['test_evaluation'])

Ho Chi Minh City Infrastructure Investment JSC
Training result: {'mse': 2292548.081561389, 'rmse': 1514.1162708198433, 'mae': 1110.5542485873066, 'mape': 3.616332626362824}
Testing result: {'mse': 1121170.3347406513, 'rmse': 1058.8533112479042, 'mae': 799.4712728717434, 'mape': 3.21578642213701}
Ho Chi Minh City Development Joint Stock Commercial Bank
Training result: {'mse': 715864.913323427, 'rmse': 846.088005661011, 'mae': 661.9453234499405, 'mape': 2.2007933456231252}
Testing result: {'mse': 125634.02197809114, 'rmse': 354.44889896583277, 'mae': 289.0280462710216, 'mape': 0.9970510882658592}
Hoa Phat Group JSC
Training result: {'mse': 1555976.883578297, 'rmse': 1247.3880244648403, 'mae': 931.4530404557088, 'mape': 2.4271721844256766}
Testing result: {'mse': 863472.3308899885, 'rmse': 929.2321189509048, 'mae': 713.451046205636, 'mape': 2.1788873936193545}
Masan Group Corp
Training result: {'mse': 17009743.410514235, 'rmse': 4124.287018445035, 'mae': 2894.9199857529634, 'mape': 3.408

### ANN model

In [None]:
for ticket in lst_ticket[:5]:
    time_series = df[df.name == ticket]['close']
    result = run_model_with_parameters(time_series, model_selection='ANN')
    time.sleep(1)
    print(ticket)
    print('Training result:', result['train_evaluation'])
    print('Testing result:', result['test_evaluation'])

### Hybrid model

In [None]:
for ticket in lst_ticket[:5]:
    time_series = df[df.name == ticket]['close']
    result = run_model_with_parameters(time_series, model_selection='Hybrid')
    time.sleep(1)
    print(ticket)
    print('Training result:', result['train_evaluation'])
    print('Testing result:', result['test_evaluation'])

### Run model without parameters

In [None]:
for ticket in lst_ticket[:5]:
    time_series = df[df.name == ticket]['close']
    result, lst_result = run_model_without_parameters(time_series, model_selection='Hybrid', q=range(0, 4))
    print(ticket, ':', result['lag'], result['hidden_layers'], result['order'])

In [None]:
for ticket in lst_ticket[:5]:
    time_series = df[df.name == ticket]['close']
    result, lst_result = run_model_without_parameters(time_series, model_selection='ARIMA', q=range(0, 4))
    print(ticket, ':', result['order'])

In [None]:
for ticket in lst_ticket[:5]:
    time_series = df[df.name == ticket]['close']
    result, lst_result = run_model_without_parameters(time_series, model_selection='ANN', q=range(0, 4))
    print(ticket, ':', result['lag'], result['hidden_layers'])

In [None]:
# print(result['test_evaluation'])
# lst_dct_result = list()
# for result_ in lst_result:
#     lst_dct_result.append({
#         'order': result_['order'],
#         'mse': result_['test_evaluation']['mse'],
#         'rmse': result_['test_evaluation']['rmse'],
#         'mae': result_['test_evaluation']['mae'],
#         'mape': result_['test_evaluation']['mape'],
#     })
# df_result = pd.DataFrame(data=lst_dct_result)
# df_result.head(20)