# Overview - Population Time Series Data
### _Explore Time Series from the U.S. Census Bureau_
https://www.kaggle.com/census/population-time-series-data

This is a dataset from the U.S. Census Bureau hosted by the Federal Reserve Economic Database (FRED). FRED has a data platform found here and they update their information according the amount of data that is brought in. Explore the U.S. Census Bureau using Kaggle and all of the data sources available through the U.S. Census Bureau organization page!

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
from tensorflow import keras
import datetime
import os

# Data Preparation

In [None]:
dataframe = pd.read_csv('../input/population-time-series-data/POP.csv')
dataframe = dataframe[['date','value']]

In [None]:
dataframe['date'] = pd.to_datetime(dataframe['date'])

In [None]:
dataframe.tail()

In [None]:
plt.plot(dataframe.date,dataframe.value)
plt.title('Population 1950-2019')
plt.savefig('Population-Past-Now.png')

# Data Preprocessing

In [None]:
def split_sequence(sequence, n_steps=3):
    sequence = list(sequence)
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    def reshape(d):
        d = np.array(d)
        d = np.reshape(d,(d.shape[0],d.shape[1],1))
        return d
    return reshape(X), np.array(y)

In [None]:
train_data = dataframe.value.iloc[:700]
test_data = dataframe.value.iloc[700:]

In [None]:
x_train,y_train = split_sequence(train_data)
x_test,y_test = split_sequence(test_data)

# RNN Model

In [None]:
model = keras.Sequential([
    keras.layers.LSTM(64,input_shape=(3,1,),activation='relu',return_sequences=True),
    keras.layers.LSTM(64,activation='relu'),
    keras.layers.Dense(1)
])

In [None]:
model.compile(loss='mse',optimizer='adam')

In [None]:
model.summary()

# Callback & EarlyStopping

In [None]:
%load_ext tensorboard

In [None]:
os.makedirs('logs',exist_ok=True)
logdir = os.path.join('logs',datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))

In [None]:
callback = keras.callbacks.TensorBoard(logdir)
earlyStoping = keras.callbacks.EarlyStopping(monitor='loss',patience=3)

# Training Model

In [None]:
history = model.fit(x_train,y_train,epochs=100,batch_size=32,callbacks=[callback,earlyStoping],verbose=2)

In [None]:
plt.plot(history.history['loss'])
plt.title('RNN Model Training Loss')
plt.savefig('RNNModel-TrainingLoss.png')

# Demonstrate Prediction

In [None]:
plt.plot(model.predict(x_test),label='Prediction')
plt.plot(y_test,label='Actual')
plt.legend()
plt.title('Prediction Demonstration (Test)')
plt.savefig('PredictionDemonstration-Test.png')

# Future Prediction

In [None]:
def predict_future(shift_count):
    def reshape(three):
        return np.array(three).reshape(1,3,1) 
    array =  list(dataframe.value) + []
    now = len(dataframe)-3
    last = len(dataframe)
    for _ in range(shift_count):
        converted = reshape(array[now:last])
        array.append(model.predict(converted)[0][0])
        now += 1
        last += 1
    return array

In [None]:
future_prediction = predict_future(1000)

In [None]:
plt.figure(figsize=(10,5))
plt.plot(future_prediction,'--',label='Prediction')
plt.plot(dataframe.value,label='Actual Data',alpha=0.7)
plt.title('Prediksi populasi dalam 1000 hari ke depan')
plt.legend();
plt.savefig('Prediction-Now-1000daysFuture.png')

# Save Model

In [None]:
model.save('population-rnn.h5')