## How to use Transformer Networks to build a Forecasting model: data load
- https://towardsdatascience.com/how-to-use-transformer-networks-to-build-a-forecasting-model-297f9270e630

<div style="text-align: right"> <b>Author : Kwang Myung Yu</b></div>
<div style="text-align: right"> Initial upload: 2023.11.06</div>
<div style="text-align: right"> Last update: 2023.11.06</div>

In [1]:
import datetime
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings; warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline
# print(plt.stype.available)

# Options for pandas
pd.options.display.max_columns = 30

In [2]:
from tqdm import tqdm
from data_utils import process_df

In [3]:
data = pd.read_csv("data/data.csv")

In [4]:
data.head()

Unnamed: 0,timestamp,index,article,amplitude,offset,views
0,2015-01-01,0,e288d86c0c8641a7b25ba1cc435e28d7,3.203435,0.774873,2.014996
1,2015-01-02,1,e288d86c0c8641a7b25ba1cc435e28d7,3.197775,0.743587,3.224294
2,2015-01-03,2,e288d86c0c8641a7b25ba1cc435e28d7,3.192114,0.716603,2.987859
3,2015-01-04,3,e288d86c0c8641a7b25ba1cc435e28d7,3.186454,0.694619,3.996244
4,2015-01-05,4,e288d86c0c8641a7b25ba1cc435e28d7,3.180794,0.678206,3.76322


In [5]:
data.shape

(36540000, 6)

In [6]:
data, cols = process_df(data)

In [7]:
data

Unnamed: 0,timestamp,index,article,amplitude,offset,views,day_of_month,day_of_year,month,week_of_year,year,views_lag_1
0,2015-01-01,0,e288d86c0c8641a7b25ba1cc435e28d7,3.203435,0.774873,2.014996,0.032258,0.002740,0.083333,0.018868,0.0,0.000000
1,2015-01-02,1,e288d86c0c8641a7b25ba1cc435e28d7,3.197775,0.743587,3.224294,0.064516,0.005479,0.083333,0.018868,0.0,2.014996
2,2015-01-03,2,e288d86c0c8641a7b25ba1cc435e28d7,3.192114,0.716603,2.987859,0.096774,0.008219,0.083333,0.018868,0.0,3.224294
3,2015-01-04,3,e288d86c0c8641a7b25ba1cc435e28d7,3.186454,0.694619,3.996244,0.129032,0.010959,0.083333,0.018868,0.0,2.987859
4,2015-01-05,4,e288d86c0c8641a7b25ba1cc435e28d7,3.180794,0.678206,3.763220,0.161290,0.013699,0.083333,0.037736,0.0,3.996244
...,...,...,...,...,...,...,...,...,...,...,...,...
36539995,2019-12-28,1822,28939ba7e81d47a8944d2029a7966c9e,-1.324961,-1.346280,-0.415053,0.903226,0.991781,1.000000,0.981132,0.8,-0.409966
36539996,2019-12-29,1823,28939ba7e81d47a8944d2029a7966c9e,-1.326153,-1.193783,-0.266651,0.935484,0.994521,1.000000,0.981132,0.8,-0.415053
36539997,2019-12-30,1824,28939ba7e81d47a8944d2029a7966c9e,-1.327345,-1.035194,-0.247692,0.967742,0.997260,1.000000,0.018868,0.8,-0.266651
36539998,2019-12-31,1825,28939ba7e81d47a8944d2029a7966c9e,-1.328537,-0.875333,-0.433264,1.000000,1.000000,1.000000,0.018868,0.8,-0.247692


In [8]:
cols

['day_of_month', 'day_of_year', 'month', 'week_of_year', 'year']

In [9]:
data.to_csv("data/processed_data.csv")

In [10]:
config = {
        "features": cols,
        "target": "views",
        "group_by_key": "article",
        "lag_features": ["views_lag_1"],
    }
config

{'features': ['day_of_month', 'day_of_year', 'month', 'week_of_year', 'year'],
 'target': 'views',
 'group_by_key': 'article',
 'lag_features': ['views_lag_1']}

In [12]:
import json

with open("data/config.json", "w") as f:
    json.dump(config, f, indent=4)