Date: 8-Dec-23

Time Series forecating (using Facebook Prophet): Air quality (Bangkok)
- Learning from: Prasert Kanawattanachai (CBS)
- Youtube: https://www.youtube.com/prasertcbs
- Github: https://github.com/prasertcbs/
- Dataset: bangkok-air-quality.csv (source: https://aqicn.org/data-platform/register/)
- Facebook Prophet: https://github.com/facebook/prophet

In [1]:
# import libraries

import sys
import pandas as pd
import numpy as np
# import math
import matplotlib.pylab as plt
import seaborn as sns

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters() 

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
print(f'Pythoon version : {sys.version}')
print(f'pandas version  : {pd.__version__}')
print(f'numpy version   : {np.__version__}')
print(f'seaborn version : {sns.__version__}')

pd.Timestamp.now()

Pythoon version : 3.11.1 (main, Dec 24 2022, 22:46:17) [Clang 14.0.0 (clang-1400.0.29.202)]
pandas version  : 1.5.2
numpy version   : 1.24.1
seaborn version : 0.12.2


Timestamp('2023-12-08 21:12:31.354842')

In [3]:
import warnings

warnings.filterwarnings('ignore')

In [4]:
# read data to a dataframe

url = 'https://github.com/prasertcbs/basic-dataset/raw/master/bangkok-air-quality.csv'
data = pd.read_csv(url)
data.head()

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co
0,2021/3/1,93,30,8,6,1.0,
1,2021/3/2,68,27,7,5,1.0,
2,2021/3/3,58,37,13,9,1.0,
3,2021/3/4,74,35,13,6,,
4,2021/3/5,72,38,15,6,1.0,


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2624 entries, 0 to 2623
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    2624 non-null   object
 1    pm25   2624 non-null   object
 2    pm10   2624 non-null   object
 3    o3     2624 non-null   object
 4    no2    2624 non-null   object
 5    so2    2624 non-null   object
 6    co     2624 non-null   object
dtypes: object(7)
memory usage: 143.6+ KB


In [6]:
data = pd.read_csv(url, parse_dates = ['date'], na_values = ' ', skipinitialspace = True)
data.head()

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co
0,2021-03-01,93.0,30.0,8.0,6.0,1.0,
1,2021-03-02,68.0,27.0,7.0,5.0,1.0,
2,2021-03-03,58.0,37.0,13.0,9.0,1.0,
3,2021-03-04,74.0,35.0,13.0,6.0,,
4,2021-03-05,72.0,38.0,15.0,6.0,1.0,


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2624 entries, 0 to 2623
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    2624 non-null   datetime64[ns]
 1   pm25    1680 non-null   float64       
 2   pm10    2609 non-null   float64       
 3   o3      2610 non-null   float64       
 4   no2     2611 non-null   float64       
 5   so2     1877 non-null   float64       
 6   co      963 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 143.6 KB


In [8]:
data = data.sort_values('date').reset_index(drop = True).copy() # sort date and reset index - ascending sort
data

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co
0,2013-12-31,,,68.0,33.0,3.0,7.0
1,2014-01-01,,84.0,90.0,48.0,5.0,16.0
2,2014-01-02,,84.0,119.0,63.0,7.0,14.0
3,2014-01-03,,103.0,95.0,57.0,7.0,13.0
4,2014-01-04,,101.0,52.0,34.0,1.0,
...,...,...,...,...,...,...,...
2619,2021-03-10,98.0,53.0,16.0,12.0,1.0,
2620,2021-03-11,111.0,52.0,19.0,10.0,,
2621,2021-03-12,101.0,48.0,15.0,7.0,1.0,
2622,2021-03-13,96.0,32.0,9.0,4.0,1.0,


In [9]:
y_col = 'pm25' # forecast column
y_col

'pm25'

In [10]:
# drop NA/Null referred to column pm25

data.dropna(subset = [y_col], inplace = True)

In [11]:
data

Unnamed: 0,date,pm25,pm10,o3,no2,so2,co
938,2016-07-30,61.0,19.0,17.0,15.0,8.0,6.0
939,2016-07-31,68.0,24.0,25.0,13.0,7.0,6.0
940,2016-08-01,60.0,20.0,26.0,14.0,2.0,6.0
941,2016-08-02,68.0,25.0,21.0,14.0,7.0,6.0
942,2016-08-03,70.0,26.0,23.0,16.0,2.0,6.0
...,...,...,...,...,...,...,...
2619,2021-03-10,98.0,53.0,16.0,12.0,1.0,
2620,2021-03-11,111.0,52.0,19.0,10.0,,
2621,2021-03-12,101.0,48.0,15.0,7.0,1.0,
2622,2021-03-13,96.0,32.0,9.0,4.0,1.0,


In [12]:
df = data[['date', y_col]]
df.head(4)

Unnamed: 0,date,pm25
938,2016-07-30,61.0
939,2016-07-31,68.0
940,2016-08-01,60.0
941,2016-08-02,68.0


In [13]:
df.columns

Index(['date', 'pm25'], dtype='object')

In [14]:
df.columns = ['ds', 'y']
df.head(4)

Unnamed: 0,ds,y
938,2016-07-30,61.0
939,2016-07-31,68.0
940,2016-08-01,60.0
941,2016-08-02,68.0


### Time Series Forecating with Prophet

In [1]:
pip install pystan

Collecting pystan
  Downloading pystan-3.7.0-py3-none-any.whl (13 kB)
Collecting aiohttp<4.0,>=3.6 (from pystan)
  Downloading aiohttp-3.9.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (7.4 kB)
Collecting clikit<0.7,>=0.6 (from pystan)
  Downloading clikit-0.6.2-py2.py3-none-any.whl (91 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.8/91.8 kB[0m [31m927.8 kB/s[0m eta [36m0:00:00[0m kB/s[0m eta [36m0:00:01[0m
[?25hINFO: pip is looking at multiple versions of pystan to determine which version is compatible with other requirements. This could take a while.
Collecting pystan
  Downloading pystan-3.6.0-py3-none-any.whl (13 kB)
  Downloading pystan-3.5.0-py3-none-any.whl (13 kB)
  Downloading pystan-3.4.0-py3-none-any.whl (13 kB)
  Downloading pystan-3.3.0-py3-none-any.whl (13 kB)
  Downloading pystan-3.2.0-py3-none-any.whl (13 kB)
  Downloading pystan-3.1.1-py3-none-any.whl (13 kB)
  Downloading pystan-3.1.0-py3-none-any.whl (13 kB)
INFO: pip is s

In [2]:
pip install prophet==1.1.3

Collecting prophet==1.1.3
  Downloading prophet-1.1.3.tar.gz (68 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m440.1 kB/s[0m eta [36m0:00:00[0m kB/s[0m eta [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting cmdstanpy>=1.0.4 (from prophet==1.1.3)
  Using cached cmdstanpy-1.2.0-py3-none-any.whl.metadata (3.9 kB)
Collecting LunarCalendar>=0.0.9 (from prophet==1.1.3)
  Downloading LunarCalendar-0.0.9-py2.py3-none-any.whl (18 kB)
Collecting convertdate>=2.1.2 (from prophet==1.1.3)
  Downloading convertdate-2.4.0-py3-none-any.whl (47 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.9/47.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting holidays>=0.14.2 (from prophet==1.1.3)
  Downloading holidays-0.38-py3-none-any.whl.metadata (21 k

In [None]:
### failed to install package, just try to work on windows 