# Time Series Forecasting


In [None]:
# Predict the sales of car

In [None]:
## Importing libraries
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

In [None]:
## loading the data
data=pd.read_csv('sales-cars.csv')
data.head()

In [None]:
data.Month[1]
# Month is actually given as string here

In [None]:
data.info()

In [None]:
## parsing the date (Converts the string representation of a date to Date object) 
# and using date column as index
data=pd.read_csv('sales-cars.csv',parse_dates=[0],index_col='Month')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.head()

In [None]:
# Data Visualization

In [None]:
plt.plot(data)
plt.show()
## from plot we can see the series given is not stationary

## Stationarity

* Stationarity means that the statistical properties of a time series (or rather the process generating it) do not change over time.
* Stationarity is important because many useful analytical tools and statistical tests and models rely on it.

* Constant mean
* Constant variance
* Constant covariance between periods of identical distance

* All it states is that the covariance between time periods of identical lengths (let’s say 10 days/hours/minutes) should be identical to the covariance of some other period of the same length:

![image-2.png](attachment:image-2.png)



* why do we need stationarity? 2 reasons (the most important ones):
Stationary processes are easier to analyze
Stationarity is assumed by most of the algorithms

* How to check if given series is stationary or not.
We need to check autocorrelation
Autocorrelation is the similarity between observations as a function of the time lag between them.

* When plotting the value of the ACF for increasing lags (a plot called a correlogram), the values tend to degrade to zero quickly for stationary time series (see figure 1, right), while for non-stationary data the degradation will happen more slowly 

![image.png](attachment:image.png)

In [None]:
## Plotting the autocorrelation plot
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
plot_acf(data)
## from the autocorrelation plot it is clear that given series is not stationary.
# if the correlation is slowly decaying, it means that the data is not stationary.

In [None]:
## make the data stationary by taking difference of 1
data1=data.diff(periods=1)

# integrated of order 1, denoted by d (d--> difference)

In [None]:
data1.head() # 1st row has NaN as there is no previous value to take difference

In [None]:
data1=data1.iloc[1:] # remove 1st row since it has nan

In [None]:
data1.head()

In [None]:
plot_acf(data1)

In [None]:
data1.plot()
# No trend here.. now data is stationary

In [None]:
data1.shape

In [None]:
## Creating training and test sets
train=data1[:27] # 27th index record will excluded. 35: 0-26records for training 
test=data1[27:] # from 27th index all records for testing

In [None]:
train.shape

In [None]:
# training data from 0 to 26 (27 records)

In [None]:
## Applying autoregressive model
from statsmodels.tsa.ar_model import AR
from sklearn.metrics import mean_squared_error

In [None]:
## model creation
ar_model = AR(train) # pass the training data to model

# Call fit function... 
ar_model_fit=ar_model.fit()

In [None]:
##making prediction for test data (records from index 27 to 34)
prediction=ar_model_fit.predict(start=27,end=34) # starting point and ending point for prediction

In [None]:
test

In [None]:
plt.plot(test)
plt.plot(prediction,color='green')

## ARIMA Model

In [None]:
## importing the library
from statsmodels.tsa.arima_model import ARIMA

In [None]:
data.shape

In [None]:
train=data[:27] # Total 36 records: from 0-26 index for training (ie., 27records)
test=data[27:] # from 27th index all records for testing

In [None]:
train.shape

In [None]:
test.shape

In [None]:
##Model object creation and fitting the model
model_arima = ARIMA(train, order=(1,1,0)) # p,d,q 

#p - periods taken for autoregressive model
#d - how many times differencing is done (integrated order)
#q - peroids taken for moving average

model_arima_fit = model_arima.fit()

In [None]:
## evaluate the model
print(model_arima_fit.aic)

In [None]:
## predicting the out of sample points
predictions = model_arima_fit.forecast(steps=9)[0] # 9 records for predicting

# The result of the forecast() function is an array containing the forecast value, 
# the standard error of the forecast, and the confidence interval information.
# we are only interested in the first element of this forecast.. so index 0 to take 1st value.

In [None]:
predictions

In [None]:
## plotting the test data
plt.plot(test)


In [None]:
## plotting the forecasted values

plt.plot(predictions,color='green')

In [None]:
## Geeting the optimal values of p,q an d
import itertools # to create iterators

p =d= q=range(0,5)

pdq = list(itertools.product(p,d,q))
pdq

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
for params in pdq:
    try:
        model_arima = ARIMA(train, order=params)
        model_arima_fit = model_arima.fit()
        print(params, model_arima_fit.aic)
    except:
        continue

In [None]:
## create the final model with lowest aic score parameter
model_arima = ARIMA(train, order=(4, 2, 1)) # p,d,q

model_arima_fit = model_arima.fit()

In [None]:
print(model_arima_fit.aic)

In [None]:

prediction = model_arima_fit.forecast(steps=8)[0] # step: going to predict 8 values i.e, the no of records in test data
prediction

In [None]:
plt.plot(test)

In [None]:
plt.plot(prediction,color='green')