# California Births using XGBoost

In [None]:
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from matplotlib import pyplot

import warnings
warnings.filterwarnings('ignore')

# Implementation

In [None]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols = list()
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
	for i in range(0, n_out):
		cols.append(df.shift(-i))
	agg = concat(cols, axis=1)
	if dropnan:
		agg.dropna(inplace=True)
	return agg.values

def train_test_split(data, n_test):
	return data[:-n_test, :], data[-n_test:, :]

def xgboost_forecast(train, testX):
	train = asarray(train)
	trainX, trainy = train[:, :-1], train[:, -1]
	model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
	model.fit(trainX, trainy)
	yhat = model.predict(asarray([testX]))
	return yhat[0]

def walk_forward_validation(data, n_test):
	predictions = list()
	train, test = train_test_split(data, n_test)
	history = [x for x in train]
	for i in range(len(test)):
		testX, testy = test[i, :-1], test[i, -1]
		yhat = xgboost_forecast(history, testX)
		predictions.append(yhat)
		history.append(test[i])
		print('expected=%.1f, predicted=%.1f' % (testy, yhat))
	error = mean_absolute_error(test[:, -1], predictions)
	return error, test[:, -1], predictions

# Training

In [None]:
path = '../input/dailytotalfemalebirths/daily-total-female-births.csv'
series = read_csv(path, header=0, index_col=0)
values = series.values

data = series_to_supervised(values, n_in=6)

mae, y, yhat = walk_forward_validation(data, 12)
print('MAE: %.3f' % mae)

# Plot

In [None]:
pyplot.plot(y, label='Expected')
pyplot.plot(yhat, label='Predicted')
pyplot.legend()
pyplot.show()