In [1]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import MinMaxScaler

import os
import logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import tensorflow as tf
from tensorflow import keras
#tf.autograph.set_verbosity(3)
#tf.get_logger().setLevel('ERROR')

from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout

import chart_studio.plotly as py
import plotly.offline as pyoff
import plotly.graph_objs as go
pyoff.init_notebook_mode(connected=True)

In [2]:
obs = pd.read_csv('obesity_sums.csv', index_col = 1)
pop = pd.read_csv('pop_sums.csv', index_col = 1)

In [3]:
obs.drop(columns = obs.columns[0], inplace = True)
pop.drop(columns = pop.columns[0], inplace = True)

In [4]:
obs.head()

Unnamed: 0_level_0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,Uruguay,Uzbekistan,Vanuatu,Venezuela (Bolivarian Republic of),Viet Nam,Yemen,Yemen Arab Republic (until 1990),Zambia,Zimbabwe,sum_yr
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1975,27738.248,83895.393,521474.391,4114.47,26463.808,1873.638,2026515.012,122889.228,1057115.85,447584.137,...,241543.05,307490.391,2582.603,620170.937,,,87875.5,33133.776,95983.8,101690300.0
1976,28151.132,87965.642,554596.75,4448.78,31160.8,1968.42,2126952.83,130830.64,1092576.679,455693.472,...,248831.781,326432.24,2781.142,666747.164,,,93129.504,36806.154,105372.064,107017700.0
1977,35650.355,93913.2,580828.214,4751.795,32280.848,2027.484,2208714.442,139175.96,1143350.12,470797.44,...,257016.412,355657.848,2990.079,707066.008,29521.042,,99022.149,40746.735,111814.956,112218400.0
1978,35978.345,98460.161,618651.584,5060.55,33523.4,2122.68,2291126.046,147715.56,1197284.833,485625.648,...,265796.776,378105.493,3207.719,765534.484,30358.912,,105526.792,42152.235,118645.074,117352200.0
1979,36086.65,104711.922,668897.984,5431.608,39113.271,2214.266,2395318.25,158243.475,1240174.03,500297.364,...,274676.22,401084.772,3430.185,818630.39,62448.396,,116458.11,46541.456,129657.888,122956000.0


In [5]:
pop.head()

Unnamed: 0_level_0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,Uruguay,Uzbekistan,Vanuatu,Venezuela (Bolivarian Republic of),Viet Nam,Yemen,Yemen Arab Republic (until 1990),Zambia,Zimbabwe,sum_yr
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1975,6934562.0,1471849.0,8838549.0,30705.0,3780544.0,36738.0,18256892.0,1861958.0,10067770.0,5812781.0,...,2046975.0,7884369.0,54949.0,7471939.0,,,3515020.0,2548752.0,3199460.0,2445823000.0
1976,7037783.0,1516649.0,9091750.0,31777.0,3895100.0,37140.0,18495242.0,1923980.0,10210997.0,5842224.0,...,2056461.0,8160806.0,56758.0,7752874.0,,,3581904.0,2629011.0,3292877.0,2498616000.0
1977,7130071.0,1565220.0,9368197.0,32771.0,4035106.0,37546.0,18717919.0,1988228.0,10394092.0,5884968.0,...,2072713.0,8468044.0,58629.0,8034841.0,29521042.0,,3667487.0,2716449.0,3388332.0,2583692000.0
1978,7195669.0,1614101.0,9666431.0,33737.0,4190425.0,37905.0,18934926.0,2051605.0,10595441.0,5922264.0,...,2092888.0,8793151.0,60523.0,8321027.0,30358912.0,,3768814.0,2810149.0,3489561.0,2643168000.0
1979,7217330.0,1662094.0,9983552.0,34818.0,4345919.0,38177.0,19162546.0,2109913.0,10784122.0,5955921.0,...,2112894.0,9115563.0,62367.0,8617162.0,31224198.0,,3881937.0,2908841.0,3601608.0,2705041000.0


In [6]:
obs.index = pd.to_datetime(obs.index, format = '%Y')
pop.index = pd.to_datetime(pop.index, format = '%Y')

In [7]:
obs_sums = obs[['sum_yr']]
pop_sums = pop[['sum_yr']]

In [8]:
obs_sums = obs_sums.rename(columns = {'sum_yr' : 'sum_obs'})
pop_sums = pop_sums.rename(columns = {'sum_yr' : 'sum_pop'})

In [9]:
# this code was derived from another source and modified into a function
# https://towardsdatascience.com/time-series-prediction-beyond-test-data-3f4625019fd9

def TimeSeriesPredictor(df, output_length, features):
    tf.autograph.set_verbosity(1)
    
    train = df

    scaler = MinMaxScaler()

    scaler.fit(train)

    train = scaler.transform(train)
    generator = TimeseriesGenerator(train, train, length = output_length, batch_size = 6)

    model = Sequential()
    model.add(LSTM(200, activation = 'relu', input_shape = (output_length, features)))
    model.add(Dropout(0.15))
    model.add(Dense(1))

    optimizer = keras.optimizers.Adam(learning_rate = 0.001)

    model.compile(optimizer = optimizer, loss = 'mse')

    history = model.fit(generator, epochs = 100, verbose = 1)
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    pred_list = []

    batch = train[-output_length:].reshape((1, output_length, features))

    for i in range(output_length):   
        pred_list.append(model.predict(batch)[0]) 
        batch = np.append(batch[:,1:,:], [[pred_list[i]]], axis = 1)

    add_dates = [df.index[-1] + DateOffset(years = x) for x in range(0, output_length + 1)]

    future_dates = pd.DataFrame(index = add_dates[1:], columns = df.columns)

    predict = pd.DataFrame(scaler.inverse_transform(pred_list), 
                           index = future_dates[-output_length:].index, 
                           columns = ['pred'])

    project = pd.concat([df, predict], axis=1)

    return [hist, project]

In [10]:
# How will the obese population look in 2030?
obs_ts = TimeSeriesPredictor(obs_sums, 14, 1)

  ...
    to  
  ['...']
INFO:tensorflow:Converted call: <function DatasetV2.from_generator.<locals>.flat_map_fn at 0x0000026EED7547B8>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int32>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function _process_inputs.<locals>.standardize_function.<locals>.map_fn at 0x0000026EED7549D8>
    args: (<tf.Tensor 'args_0:0' shape=(None, None, None) dtype=float64>, <tf.Tensor 'args_1:0' shape=(None, None) dtype=float64>)
    kwargs: {}

Train for 5 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
E

Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EEDAC9F28>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x0000026EEDAC9D08>
    args: (<tf.Tensor 'args_0:0' shape=(1,) dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.slice_inputs.<locals>.grab_batch at 0x0000026EEDAF12F0>
    args: (<tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, (<tf.Tensor 'args_1:0' shape=(1, 14, 1) dtype=float64>,))
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EEDB1F620>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indic

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EFAB6DB70>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x0000026EFAB6D048>
    args: (<tf.Tensor 'args_0:0' shape=(1,) dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.slice_inputs.<locals>.grab_batch at 0x0000026EEDB75C80>
    args: (<tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, (<tf.Tensor 'args_1:0' shape=(1, 14, 1) dtype=float64>,))
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EEDB759D8>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x0000026EEDAC9D90>
    args: (<tf.Tensor 'args_0:0' shape=(

In [11]:
# Checking the world population in 2030
pop_ts = TimeSeriesPredictor(pop_sums, 14 , 1)

  ...
    to  
  ['...']
INFO:tensorflow:Converted call: <function DatasetV2.from_generator.<locals>.flat_map_fn at 0x0000026EFAB32F28>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int32>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function _process_inputs.<locals>.standardize_function.<locals>.map_fn at 0x0000026EFAAE40D0>
    args: (<tf.Tensor 'args_0:0' shape=(None, None, None) dtype=float64>, <tf.Tensor 'args_1:0' shape=(None, None) dtype=float64>)
    kwargs: {}

Train for 5 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
E

Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EFAAE46A8>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x0000026EED5AA0D0>
    args: (<tf.Tensor 'args_0:0' shape=(1,) dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.slice_inputs.<locals>.grab_batch at 0x0000026EEDAF1158>
    args: (<tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, (<tf.Tensor 'args_1:0' shape=(1, 14, 1) dtype=float64>,))
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EEDB75378>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x00000

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x0000026EFBDB36A8>
    args: (<tf.Tensor 'args_0:0' shape=(1,) dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.slice_inputs.<locals>.grab_batch at 0x0000026EFBDB3840>
    args: (<tf.Tensor 'args_0:0' shape=(None,) dtype=int64>, (<tf.Tensor 'args_1:0' shape=(1, 14, 1) dtype=float64>,))
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.permutation at 0x0000026EFBDB3BF8>
    args: (<tf.Tensor 'args_0:0' shape=() dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.__init__.<locals>.slice_batch_indices at 0x0000026EFBE620D0>
    args: (<tf.Tensor 'args_0:0' shape=(1,) dtype=int64>,)
    kwargs: {}

INFO:tensorflow:Converted call: <function TensorLikeDataAdapter.slice_inputs.<locals>.grab_batch at 0x0000026EFBE62510>
    args: (<tf.Tensor 'args_0:0' sh

In [12]:
# Extract out projections from the training functions
obs_hist = obs_ts[0]
obs_project = obs_ts[1]

pop_hist = pop_ts[0]
pop_project = pop_ts[1]

In [13]:
# How much loss did we see when building our model?
plot_data = [go.Scatter(x = obs_hist['epoch'], 
                        y = obs_hist['loss'], 
                        name = 'Obesity'),
             go.Scatter(x = pop_hist['epoch'], 
                        y = pop_hist['loss'], 
                        name = 'Population')]

plot_layout = go.Layout(title = 'Training Losses')

fig = go.Figure(data = plot_data, layout = plot_layout)

pyoff.iplot(fig)

In [14]:
# Let's plot our predictions along with our historical data
plot_data = [go.Scatter(x = obs_project.index, 
                        y = obs_project['sum_obs'], 
                        name = 'Obesity Actual'), 
             go.Scatter(x = obs_project.index, 
                        y = obs_project['pred'], 
                        name = 'Obesity Prediction'),
             go.Scatter(x = pop_project.index, 
                        y = pop_project['sum_pop'], 
                        name = 'Population Actual'), 
             go.Scatter(x = pop_project.index, 
                        y = pop_project['pred'], 
                        name = 'Population Prediction')]

plot_layout = go.Layout(title = 'Population vs. Obesity Growth')

fig = go.Figure(data = plot_data, layout = plot_layout)

pyoff.iplot(fig)

In [15]:
obs_project

Unnamed: 0,sum_obs,pred
1975-01-01,101690300.0,
1976-01-01,107017700.0,
1977-01-01,112218400.0,
1978-01-01,117352200.0,
1979-01-01,122956000.0,
1980-01-01,129019600.0,
1981-01-01,135411200.0,
1982-01-01,141375600.0,
1983-01-01,148483700.0,
1984-01-01,155433400.0,
