# Check the stationarity of the time-series using ADF test
### Augumented-Dickey Fuller Test tells us whether a time series is stationary or not. If time series is not stationary, we can make it so by differencing it.

#### Import the necessary libraries

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
from datetime import datetime

#### Import Data

In [None]:
df = pd.read_csv('/home/naveksha/NewData/Data/BiDailyAvgGB.csv', index_col='time', header=0)

#### Apply ADF test

In [None]:
series = df.bytes
series.index = series.index.map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
X = series.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

#### If the p-value is less than 0.05, we have a significant result, and hence if ADF Statistic is greater than critical value, series is non-stationary.
#### We diffrence the time series and visualize it.

In [None]:
diff_ = series.diff()
diff__=diff_

plt.plot(diff_, label = 'First Order Differenced Data')
plt.xlabel('Time')
plt.ylabel('Average Bytes Exchanged in 12 hours')
plt.legend()
plt.show()

####  Check the stationarity of differenced data

In [None]:
diff_ = diff_.dropna()
Y = diff_.values
result = adfuller(Y)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

#### If the series has become stationary then no need to difference it further, otherwise repeat the same steps.

In [None]:
diff_.to_csv('DifferentBiDailyAvg.csv')