# Lab - CarPark Vacancy
Reference: Dataset was extracted from one of the KlangValley shopping mall (updated to Nov 2018)

Exercise: 
1. Predict the carpark vacant status in Zone A.




In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import shutil

  return f(*args, **kwds)


## Reading dataset

In [6]:
df_sep = pd.read_csv("./data/2018-09/vacantBays_2018-9.csv")
print('Sep dataset: {}'.format(df_sep.shape))

df_oct = pd.read_csv("./data/2018-10/vacantBays_2018-10.csv")
#df_oct.dtypes
print('Oct dataset: {}'.format(df_oct.shape))

df_nov = pd.read_csv("./data/2018-11/vacantBays_2018-11.csv")
print('Nov dataset: {}'.format(df_nov.shape))

df = df_sep.append(df_oct.append(df_nov))
print('Sep - Oct dataset: {}'.format(df.shape))

Sep dataset: (1051, 9)
Oct dataset: (2969, 9)
Nov dataset: (2871, 9)
Sep - Oct dataset: (6891, 9)


## Cleanup & prepare data

In [7]:
#Cleanup numbers

cleanup_nums = {"A": {"FULL": 0},
                "B": {"FULL": 0},
                "C": {"FULL": 0},
                "G": {"FULL": 0},
                "H": {"FULL": 0},
                "P": {"FULL": 0},
                "U": {"FULL": 0},
               }

df.replace(cleanup_nums, inplace=True)
df.dropna(subset=['A','B','C','G','H','P','U'])
print(df.shape)

(6891, 9)


In [10]:
# Convert to datetime

df['A'] = df['A'].astype(np.float64)
df['B'] = df['B'].astype(np.float64)
df['C'] = df['C'].astype(np.float64)
df['G'] = df['G'].astype(np.float64)
df['H'] = df['H'].astype(np.float64)
df['P'] = df['P'].astype(np.float64)
df['U'] = df['U'].astype(np.float64)
df['DateTime'] =  pd.to_datetime(df['DateTime'])
df.dtypes

DateTime      datetime64[ns]
IsoWeekday             int64
A                    float64
B                    float64
C                    float64
G                    float64
H                    float64
P                    float64
U                    float64
dtype: object

In [14]:
# Extract Hour and Minute
df['Hour'] = df.DateTime.apply(lambda x: x.hour)
df['Minute'] = df.DateTime.apply(lambda x: x.minute)
df.head()

Unnamed: 0,DateTime,IsoWeekday,A,B,C,G,H,P,U,A_State,Hour,Minute
0,2018-09-20 01:32:30.850890,4,770.0,2592.0,875.0,1410.0,1455.0,310.0,811.0,3,1,32
1,2018-09-20 01:39:01.932373,4,773.0,2590.0,876.0,1415.0,1459.0,310.0,810.0,3,1,39
2,2018-09-20 01:39:40.024931,4,773.0,2591.0,876.0,1414.0,1459.0,310.0,810.0,3,1,39
3,2018-09-20 01:39:52.258908,4,773.0,2591.0,876.0,1415.0,1459.0,310.0,810.0,3,1,39
4,2018-09-20 01:40:49.225890,4,773.0,2590.0,876.0,1412.0,1458.0,310.0,810.0,3,1,40


## Classify the carpark vacant status 

In [11]:
df.describe()

Unnamed: 0,IsoWeekday,A,B,C,G,H,P,U
count,6891.0,6857.0,6856.0,6857.0,6853.0,6851.0,6855.0,6855.0
mean,4.010013,697.385883,1440.016044,723.30261,772.057931,779.246679,209.576222,494.911451
std,1.974669,604.831289,1074.659781,667.102289,569.929676,588.127668,106.128075,287.413354
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,63.0,292.0,54.0,183.0,166.0,121.0,233.0
50%,4.0,739.0,1592.5,726.0,826.0,823.0,254.0,553.0
75%,6.0,1057.0,2575.0,1071.0,1372.0,1399.0,305.0,779.0
max,7.0,1786.0,2646.0,1921.0,1495.0,1519.0,317.0,831.0


In [13]:
# Define status for Carpark Zone A
def A_state(x):
    if x > 1057:
        return 4 # you are early!
    elif x > 739:
        return 3 # plenty of vacancy
    elif x > 63:
        return 2 # limited bay
    elif x > 0:
        return 1 # good luck
    else:
        return 0
    
df['A_State'] = df['A'].apply(A_state)
df.head()

Unnamed: 0,DateTime,IsoWeekday,A,B,C,G,H,P,U,A_State
0,2018-09-20 01:32:30.850890,4,770.0,2592.0,875.0,1410.0,1455.0,310.0,811.0,3
1,2018-09-20 01:39:01.932373,4,773.0,2590.0,876.0,1415.0,1459.0,310.0,810.0,3
2,2018-09-20 01:39:40.024931,4,773.0,2591.0,876.0,1414.0,1459.0,310.0,810.0,3
3,2018-09-20 01:39:52.258908,4,773.0,2591.0,876.0,1415.0,1459.0,310.0,810.0,3
4,2018-09-20 01:40:49.225890,4,773.0,2590.0,876.0,1412.0,1458.0,310.0,810.0,3


## Split into dataset of: Train, Validation, Test (70%-20%-10%)

In [17]:

df_train0, df_valid0, df_test0 = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])

# select only Featured columns
df_train = df_train0[['A_State','Hour','Minute','IsoWeekday']]
df_valid = df_valid0[['A_State','Hour','Minute','IsoWeekday']]
df_test = df_test0[['A_State','Hour','Minute','IsoWeekday']]

print('Train set: {}'.format(df_train.shape))
print('Validate set: {}'.format(df_valid.shape))
print('Test set: {}'.format(df_test.shape))


Train set: (4823, 4)
Validate set: (1378, 4)
Test set: (690, 4)


## Feature & Label 

In [19]:
FEATURES_NUM = ['Hour','Minute','IsoWeekday']
LABEL = 'A_State'

## Determine metrics for validation - Accuracy

In [24]:
def print_accuracy(model, df):
  metrics = model.evaluate(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    shuffle = False
  ))
  print('Accuracy on dataset = {}'.format(metrics['accuracy']))
    
#print_accuracy(model, df_valid)

## Model: DNN Classifier

In [25]:
# DNNClassifier
OUTDIR = 'carparkClass_trained'
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

model = tf.estimator.DNNClassifier(
    hidden_units = [512, 256, 128], 
    feature_columns = [tf.feature_column.numeric_column('Hour'),
                       tf.feature_column.numeric_column('Minute'),
                       tf.feature_column.numeric_column('IsoWeekday')
                      ], 
    activation_fn = tf.nn.relu,
    n_classes=5,
    optimizer=tf.train.AdamOptimizer(
      learning_rate=0.01,
      #l1_regularization_strength=0.001
    ),
    #loss_reduction='weighted_sum',
    model_dir = OUTDIR,
#    config=tf.estimator.RunConfig(save_summary_steps=2,
#                                  save_checkpoints_steps=2)
)

model.train(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df_train,
    y = df_train[LABEL],
    batch_size = 128,
    num_epochs = 50,
    shuffle = True
  ));

print_accuracy(model, df_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_device_fn': None, '_master': '', '_task_type': 'worker', '_evaluation_master': '', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2fb78860>, '_num_ps_replicas': 0, '_save_checkpoints_steps': None, '_model_dir': 'carparkClass_trained', '_task_id': 0, '_global_id_in_cluster': 0, '_tf_random_seed': None, '_service': None, '_session_config': None, '_num_worker_replicas': 1, '_log_step_count_steps': 100, '_train_distribute': None, '_is_chief': True, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_summary_steps': 100}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into carparkClass_trained/model.ckpt.
IN

## Prediction 

In [26]:
predictions = model.predict(input_fn = tf.estimator.inputs.pandas_input_fn(
#    x = pd.DataFrame({
#                      'Hour':[9,10,12],
#                      'Minute':[0,0,0],
#                      'IsoWeekday':[4,4,5]}),
    x = df_test,
    y = None,
    batch_size = 64,
    shuffle = False
  ))

for items in predictions:
  print(items)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from carparkClass_trained/model.ckpt-1884
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'logits': array([-16.245052 , -14.926212 ,   2.6889298,   4.157054 ,   9.885626 ],
      dtype=float32), 'probabilities': array([4.4653612e-12, 1.6696349e-11, 7.4607151e-04, 3.2387599e-03,
       9.9601513e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-3.5690308,  2.561318 ,  4.062757 , -0.5455971, -4.222418 ],
      dtype=float32), 'probabilities': array([3.9301801e-04, 1.8062966e-01, 8.1069177e-01, 8.0811474e-03,
       2.0447954e-04], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-4.938882  , -2.2040906 ,  1.9009812 , -0.88653386, -1.0794947 ],
      dtype=float32), 'probabilities': array([0.00094721, 0.0

       9.4194972e-04], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-12.651388 , -12.456923 ,   1.2083621,   2.9794788,  12.570064 ],
      dtype=float32), 'probabilities': array([1.1128288e-11, 1.3517105e-11, 1.1631636e-05, 6.8363959e-05,
       9.9992001e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([ 3.6622148 ,  3.78854   , -0.04019305, -4.3608274 , -0.81140995],
      dtype=float32), 'probabilities': array([4.60606873e-01, 5.22628069e-01, 1.13604395e-02, 1.50996697e-04,
       5.25363488e-03], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-7.726447 , -6.2331862,  2.4095507, -0.5278959, -1.1163568],
      dtype=float32), 'probabilities': array([3.6602203e-05, 1.6293800e-04, 9.2366630e-01, 4.8955135e-02,
       2.7178966e-02], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': arr

      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-6.4800014 , -6.3879476 ,  2.5403306 ,  5.357483  , -0.07903314],
      dtype=float32), 'probabilities': array([6.7927367e-06, 7.4477221e-06, 5.6172688e-02, 9.3972093e-01,
       4.0921364e-03], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 3.9096458,  3.8164437,  2.0422122, -2.7081532, -6.322685 ],
      dtype=float32), 'probabilities': array([4.8381600e-01, 4.4076091e-01, 7.4759111e-02, 6.4655684e-04,
       1.7411478e-05], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-13.716036 ,  -6.477796 ,   3.3796358,   1.4454943,   9.529494 ],
      dtype=float32), 'probabilities': array([8.0082121e-11, 1.1144560e-07, 2.1285850e-03, 3.0768348e-04,
       9.9756360e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-1.4245231 , -5.33

       1.1869341e-03], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 0.47007406,  2.1430426 ,  1.7745612 , -2.2502139 , -1.6309627 ],
      dtype=float32), 'probabilities': array([0.09802053, 0.5222495 , 0.3612839 , 0.00645522, 0.0119908 ],
      dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 3.6508474 ,  3.8249903 , -0.07276917, -4.305213  , -0.8239334 ],
      dtype=float32), 'probabilities': array([4.4921306e-01, 5.3466487e-01, 1.0846916e-02, 1.5746326e-04,
       5.1177568e-03], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 4.2629795 ,  4.029665  , -0.34825724, -4.7440047 , -0.6838244 ],
      dtype=float32), 'probabilities': array([5.5276942e-01, 4.3774056e-01, 5.4942626e-03, 6.7742352e-05,
       3.9280257e-03], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-2.8750613 ,

       1.08935026e-04], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-23.629725 , -16.27094  ,   1.4168026,   3.1330278,  15.774785 ],
      dtype=float32), 'probabilities': array([7.7061395e-18, 1.2098069e-14, 5.8130723e-07, 3.2340970e-06,
       9.9999619e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([ 3.6508474 ,  3.8249903 , -0.07276917, -4.305213  , -0.8239334 ],
      dtype=float32), 'probabilities': array([4.4921306e-01, 5.3466487e-01, 1.0846916e-02, 1.5746326e-04,
       5.1177568e-03], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-6.2282915, -8.740787 ,  0.5050209,  1.7768645,  0.5547044],
      dtype=float32), 'probabilities': array([2.1186080e-04, 1.7174649e-05, 1.7794704e-01, 6.3481253e-01,
       1.8701141e-01], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([

      dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-3.2770977,  0.9270004,  2.8733811,  0.4973793, -2.0757933],
      dtype=float32), 'probabilities': array([0.00171291, 0.11469688, 0.8032561 , 0.07463964, 0.00569449],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ 0.5818712 ,  0.39435762,  1.0332372 , -4.0059648 , -1.4572499 ],
      dtype=float32), 'probabilities': array([0.28250247, 0.23419958, 0.4436577 , 0.00287442, 0.03676573],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ -0.45885327, -12.747325  ,  -1.8338416 ,   4.6926107 ,
         2.0950425 ], dtype=float32), 'probabilities': array([5.3534894e-03, 2.4650259e-08, 1.3535901e-03, 9.2446250e-01,
       6.8830311e-02], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 3.977066  ,  3.9043968 , -0.29278877, -4.46

       8.2697846e-02], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-19.23017  , -16.034794 ,   2.0623155,   3.9771135,  11.817412 ],
      dtype=float32), 'probabilities': array([3.2810357e-14, 8.0120573e-13, 5.7972145e-05, 3.9337383e-04,
       9.9954861e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([ -0.40930536, -15.548348  ,  -0.03368914,   3.8345985 ,
         7.51398   ], dtype=float32), 'probabilities': array([3.5298741e-04, 9.3962907e-11, 5.1391043e-04, 2.4595937e-02,
       9.7453713e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-2.0571728,  3.3499444,  3.8851156, -1.2437067, -4.8881607],
      dtype=float32), 'probabilities': array([1.6471536e-03, 3.6729521e-01, 6.2724495e-01, 3.7155058e-03,
       9.7107259e-05], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': ar

{'logits': array([-1.3900981 ,  0.60682327,  3.021731  , -1.6656644 , -2.1085277 ],
      dtype=float32), 'probabilities': array([0.01086566, 0.08004016, 0.8955484 , 0.00824858, 0.0052972 ],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-0.18446687,  0.1702084 ,  0.6380566 , -4.170462  , -0.83209705],
      dtype=float32), 'probabilities': array([0.19070095, 0.27188572, 0.43408048, 0.00354207, 0.09979084],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-14.338839 , -11.223833 ,   3.3315492,   6.309123 ,   1.0283587],
      dtype=float32), 'probabilities': array([1.0210306e-09, 2.3007448e-08, 4.8215892e-02, 9.4696540e-01,
       4.8186705e-03], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-15.9703665,  -9.642902 ,   1.3027015,   4.631646 ,   5.5968757],
      dtype=float32), 'probabilities': array([3.0834280e-10, 1.725

{'logits': array([-1.6051984 ,  0.35150704,  2.998924  , -1.4250908 , -1.9756804 ],
      dtype=float32), 'probabilities': array([0.00910258, 0.06440961, 0.90930444, 0.01089893, 0.00628443],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ 0.07326743,  0.34420103,  0.8850996 , -3.869537  , -1.4795213 ],
      dtype=float32), 'probabilities': array([0.20858239, 0.27349073, 0.46973386, 0.00404519, 0.0441479 ],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-8.574551 , -6.6402297,  2.488461 , -0.1217017, -1.0751619],
      dtype=float32), 'probabilities': array([1.42305125e-05, 9.84657527e-05, 9.07455266e-01, 6.67184517e-02,
       2.57136337e-02], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-3.2292914 ,  2.201541  ,  3.6178367 , -0.74448776, -3.3740654 ],
      dtype=float32), 'probabilities': array([8.4503909e-04, 1.929

{'logits': array([-3.3922012 ,  1.966708  ,  3.3857298 , -0.35388446, -3.1250901 ],
      dtype=float32), 'probabilities': array([0.00089773, 0.19076164, 0.7884323 , 0.01873573, 0.0011726 ],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-2.1146436 , -8.871682  ,  0.56153303,  4.772409  , -0.4988762 ],
      dtype=float32), 'probabilities': array([9.9992764e-04, 1.1625835e-06, 1.4528383e-02, 9.7943920e-01,
       5.0313841e-03], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 3.0877068, -4.9172444, -0.2889558, -4.4191823,  5.718777 ],
      dtype=float32), 'probabilities': array([6.7007631e-02, 2.2367534e-05, 2.2890666e-03, 3.6806436e-05,
       9.3064409e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-3.306356  , -4.0347705 ,  2.9257777 ,  2.0897691 , -0.67181885],
      dtype=float32), 'probabilities': array([1.3426245

       2.6209163e-04], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 2.0758388 ,  2.0393872 , -0.01261032, -4.4759192 , -0.70797527],
      dtype=float32), 'probabilities': array([0.4648322 , 0.44819343, 0.057583  , 0.0006636 , 0.02872772],
      dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([ -1.061825  , -15.857146  ,  -0.46255577,   7.323079  ,
        -0.07219827], dtype=float32), 'probabilities': array([2.2800085e-04, 8.5587426e-11, 4.1514100e-04, 9.9874347e-01,
       6.1337452e-04], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-7.743758 , -5.043818 ,  2.863119 , -0.540138 , -1.6474158],
      dtype=float32), 'probabilities': array([2.3687588e-05, 3.5244401e-04, 9.5725822e-01, 3.1842958e-02,
       1.0522738e-02], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-8.691025 ,

       9.6996248e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-15.898552 , -13.197794 ,   2.7231698,   1.3831527,  12.621968 ],
      dtype=float32), 'probabilities': array([4.1083583e-13, 6.1177508e-12, 5.0231843e-05, 1.3152763e-05,
       9.9993658e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-0.70631963, -9.664939  , -2.1269863 ,  4.000756  ,  1.9281926 ],
      dtype=float32), 'probabilities': array([7.9424288e-03, 1.0215845e-06, 1.9185179e-03, 8.7944829e-01,
       1.1068979e-01], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-3.2292914 ,  2.201541  ,  3.6178367 , -0.74448776, -3.3740654 ],
      dtype=float32), 'probabilities': array([8.4503909e-04, 1.9295554e-01, 7.9532880e-01, 1.0139422e-02,
       7.3114265e-04], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': arr

In [27]:
df_test.head()

Unnamed: 0,A_State,Hour,Minute,IsoWeekday
212,4,4,15,6
1320,2,19,15,7
1857,4,9,0,2
129,3,7,30,5
1030,2,18,45,7
