# Female Births - Time Series Forecast Model

In [None]:
from pandas import read_csv
from matplotlib import pyplot
from statsmodels.tsa.ar_model import AutoReg
from sklearn.metrics import mean_squared_error
import numpy
from math import sqrt

# Difference transforms

In [None]:
# create a difference transform of the dataset
def difference(dataset):
	diff = list()
	for i in range(1, len(dataset)):
		value = dataset[i] - dataset[i - 1]
		diff.append(value)
	return numpy.array(diff)

path='../input/daily-total-female-births/daily-total-female-births.csv'
series = read_csv(path, header=0, index_col=0, parse_dates=True, squeeze=True)

X = difference(series.values)
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:]

# Train autoregression

In [None]:
window = 6
model = AutoReg(train, lags=6)
model_fit = model.fit()
coef = model_fit.params

# Predictions

In [None]:
def predict(coef, history):
	yhat = coef[0]
	for i in range(1, len(coef)):
		yhat += coef[i] * history[-i]
	return yhat

In [None]:
history = [train[i] for i in range(len(train))]
predictions = list()
for t in range(len(test)):
	yhat = predict(coef, history)
	obs = test[t]
	predictions.append(yhat)
	history.append(obs)
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)

# Results

In [None]:
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()