# AAPL price forecasting
- This is an ipynb file created in colab environment.
- Runtime : GPU
- LightGBM

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
sys.path.append('/content/drive/MyDrive/Github/Time-series_price_analysis')

In [3]:
!ls /content/drive/MyDrive/Github/Time-series_price_analysis/*.py

/content/drive/MyDrive/Github/Time-series_price_analysis/make_dataset.py


## 0. Import Packages

In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import joblib
import random as rn
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
import os

from make_dataset import time_transform, make_input

In [51]:
# reproducibility
seed_num = 42
np.random.seed(seed_num)
rn.seed(seed_num)
os.environ['PYTHONHASHSEED']=str(seed_num)

## 1. Load and visualize dataset

In [115]:
df = pd.read_csv("/content/drive/MyDrive/Github/Time-series_price_analysis/dataset/AAPL.csv")

In [116]:
df['Date'] = df['Date'].apply(lambda x: time_transform(x))
dataset = df.sort_values('Date').reset_index(drop=True)

In [117]:
print("Shape of dataset :", dataset.shape)
dataset.head()

Shape of dataset : (2539, 7)


Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2012-02-27,18.78,18.62,18.87,18.44,547.58M,0.64%
1,2012-02-28,19.12,18.86,19.12,18.78,600.39M,1.81%
2,2012-02-29,19.37,19.34,19.56,19.13,952.00M,1.31%
3,2012-03-01,19.45,19.58,19.58,19.24,683.25M,0.41%
4,2012-03-02,19.47,19.44,19.53,19.38,431.71M,0.10%


In [118]:
date = dataset['Date'].values
close = dataset['Price'].values

## 2. Preprocessing

In [119]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_close = scaler.fit_transform(np.array(close).reshape(-1,1))

In [120]:
train_size = int(len(close)*0.795)
val_size = len(close)-train_size
# train_data, val_data = scaled_close[0:train_size], scaled_close[train_size:len(close)]

train_data, val_data = close[0:train_size], close[train_size:len(close)]
train_data = np.reshape(train_data, (train_data.shape[0], 1))
val_data = np.reshape(val_data, (val_data.shape[0], 1))

In [121]:
time_step = 1
X_train, y_train = make_input(train_data, time_step)
X_val, y_val = make_input(val_data, time_step)

In [122]:
print("X train shape :", X_train.shape)
print("y train shape :", y_train.shape)
print("X val shape :", X_val.shape)
print("y val shape :", y_val.shape)

X train shape : (2016, 1)
y train shape : (2016,)
X val shape : (519, 1)
y val shape : (519,)


## 3. Modeling - LightGBM

In [123]:
params = {
        'n_estimators': 10000,
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'max_depth': -1,
        'learning_rate': 0.01,
        'subsample': 0.72,
        'subsample_freq': 4,
        'feature_fraction': 0.4,
        'lambda_l1': 1,
        'lambda_l2': 1,
        'seed': seed_num,
        }

In [124]:
model = LGBMRegressor(**params)
model.fit(X_train, y_train)

LGBMRegressor(feature_fraction=0.4, lambda_l1=1, lambda_l2=1,
              learning_rate=0.01, metric='rmse', n_estimators=10000,
              objective='regression', seed=42, subsample=0.72,
              subsample_freq=4)

In [125]:
joblib.dump(model, '/content/drive/MyDrive/Github/Time-series_price_analysis/model_weights/lgbm.pkl')

['/content/drive/MyDrive/Github/Time-series_price_analysis/model_weights/lgbm.pkl']

## 4. Evaluation

In [126]:
loaded_model = joblib.load('/content/drive/MyDrive/Github/Time-series_price_analysis/model_weights/lgbm.pkl')

In [130]:
train_predicton = loaded_model.predict(X_train)
val_prediction = loaded_model.predict(X_val)

print("Train pred shape :", train_predicton.shape)
print("Val pred shape :", val_prediction.shape)

Train pred shape : (2016,)
Val pred shape : (519,)


In [131]:
# train_pred = scaler.inverse_transform(train_predicton.reshape(-1,1))
# val_pred = scaler.inverse_transform(val_predicton.reshape(-1,1))
# print(val_pred[:5])

In [132]:
print(val_prediction[:5])

[75.36302961 73.0791612  73.0791612  66.53690579 71.02891821]


In [133]:
close = np.array(close)
cc = close.reshape((len(close), 1))
tt1, tt2 = cc[0:train_size,:], cc[train_size:len(close),:]
xtrain, ytrain = make_input(tt1, 1)
Xval, yval = make_input(tt2, 1)

In [134]:
val_rmse = math.sqrt(mean_squared_error(yval, val_prediction))
print('Val RMSE: %.3f' % val_rmse)

Val RMSE: 57.708
