In [2]:
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot
import pandas as pd

data = read_csv(
    "../input/cs01-daily/cs01_daily.csv", 
    header=0, parse_dates=[0], 
#     index_col=0,
    squeeze=True
)

print(data.head())

In [3]:
humidity_data = data[["DATE", "RHAVG"]]

In [4]:
humidity_series = humidity_data.set_index("DATE")

In [5]:
humidity_series = humidity_series.squeeze()

In [6]:
autocorrelation_plot(humidity_series)

In [7]:
from statsmodels.tsa.arima.model import ARIMA

# humidity_data.index = humidity_data.index.to_period('M')

model = ARIMA(humidity_series, order=(5,1,0))
model_fit = model.fit()

In [8]:
print(model_fit.summary())

In [9]:
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
# density plot of residuals
residuals.plot(kind='kde')
pyplot.show()
# summary stats of residuals
print(residuals.describe())

In [10]:
from sklearn.metrics import mean_squared_error
from math import sqrt

In [11]:
# split into train and test sets
X = humidity_series.values
size = int(len(X) * 0.999)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
# walk-forward validation
for t in range(len(test)):
	model = ARIMA(history, order=(5,1,0))
	model_fit = model.fit()
	output = model_fit.forecast()
	yhat = output[0]
	predictions.append(yhat)
	obs = test[t]
	history.append(obs)
	print('predicted=%f, expected=%f' % (yhat, obs))
# evaluate forecasts
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)
# plot forecasts against actual outcomes
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()

In [16]:
import matplotlib.pyplot as plt

forecast=model_fit.predict(n_periods=5)
forecast=pd.DataFrame(forecast,columns=['Prediction'])
plt.plot(humidity_series,label='Train')
plt.plot(forecast,label='forecast')
plt.legend(loc='upper left', fontsize=8)
plt.show()

In [20]:
import statsmodels
statsmodels.__version__

In [21]:
# multi-step out-of-sample forecast
forecast1 = model_fit.forecast(steps=7)[0]

In [49]:
from pandas import read_csv
from statsmodels.tsa.arima.model import ARIMA
import numpy

# create a differenced series
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return numpy.array(diff)

# invert differenced value
def inverse_difference(history, yhat, interval=1):
	return yhat + history[-interval]

# load dataset
#series = read_csv('../input/cs01-daily/cs01_daily.csv', header=0)
# seasonal difference
X = humidity_series.values
days_in_year = 365
differenced = difference(X, days_in_year)
# fit model
model = ARIMA(differenced, order=(7,0,1))
model_fit = model.fit()
# multi-step out-of-sample forecast
forecast = model_fit.forecast(steps=5)
# invert the differenced forecast to something usable
history = [x for x in X]
day = 1
for yhat in forecast:
    inverted = inverse_difference(history, yhat, days_in_year)
    #print('Day %d: %f' % (day, inverted))
    print(f"{humidity_series.index[-1]+timedelta(days=day)}: {inverted}")
    history.append(inverted)
    day += 1

In [42]:
from datetime import datetime
from datetime import timedelta
humidity_series.index[-1]+timedelta(days=1)

In [31]:
X[0]

In [28]:
humidity_series.head()

In [29]:
type(humidity_series)