# Time Series Forecast using ARIMA model
- Let's predict stock of 10ft empty container of Busan Harbor.
- Trained on 2018~2019 data and tested on 2020 Jan ~ 2020 May data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('../input/monthly-container-holding-of-ports-in-south-korea/container.csv', index_col=0)
data.head()

# Data Extraction
- Extract Data we need.
- Harbor == Busan
- Train: Date < 2020.01.01
- Test: Date > 2019.12.01

In [None]:
data = data[data['Harbor']=='Busan']
data = data[data['isKorean'] == True]
data['Empty_10'] = data['Empty_10'].astype('float64')
data.tail()

In [None]:
series = pd.Series(list(data['Empty_10']), index=data['Date'])
series.head()

In [None]:
x = series[:'2019-12-31']
x.tail()

In [None]:
y = series['2020-01-01':]
y.head()

# Plot Data

In [None]:
series.plot()
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plot_acf(series)
plot_pacf(series)
plt.figure(figsize=(20,4))
plt.show()

## Set p, q
위 두 그래프를 기반으로 자동회귀라 두고, p=1, q=0으로 모수 설정

In [None]:
diff_1 = series.diff(periods=1)
diff_1.plot()
plot_acf(diff_1)
plot_pacf(diff_1)
plt.show()

## Set d
차분 1회로 그래프가 모두 소실되므로 d는 그냥 0으로 두자.

In [None]:
from statsmodels.tsa.arima_model import ARIMA, ARIMAResults

model = ARIMA(x, order=(1, 0, 0))
model_fit = model.fit(trend='nc', full_output=True, disp=1)
print(model_fit.summary())

In [None]:
model_fit.plot_predict()

In [None]:
fore = model_fit.forecast(steps = 1)
print(fore)

In [None]:
print(y)