# Spatial Inference

In [98]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
import tensorflow as tf
import tensorflow.keras as K

In [64]:
n = 5000

# Generate data
df = pd.DataFrame()
df['latitude'] = np.random.uniform(145, 148, n)
df['longitude'] = np.random.uniform(155, 157, n)
df['cts_occupancy'] = np.abs(np.sin(df.latitude*df.latitude) + np.sin(df.longitude) + np.cos(df.latitude*df.longitude)) + np.random.normal(0,0.2,n)
df['cts_occupancy'] = (df['cts_occupancy'] - df['cts_occupancy'].min())/(df['cts_occupancy'].max() - df['cts_occupancy'].min())

X = df[['latitude', 'longitude']].values
y = df['cts_occupancy'].values

### Decision Tree

In [85]:
regressor = DecisionTreeRegressor(random_state=0)
cv_scores = cross_val_score(regressor, X, y, cv=20, scoring = 'neg_mean_squared_error')
print("Decision tree RMSE: {}".format(np.sqrt(-np.mean(cv_scores))))

Decision tree RMSE: 0.2714980743241343


### Random Forest

In [84]:
regr = RandomForestRegressor(max_depth=400, random_state=0)
regr.fit(X, y)
cv_scores = cross_val_score(regr, X, y, cv=20, scoring = 'neg_mean_squared_error')
print("Random Forest RMSE: {}".format(np.sqrt(-np.mean(cv_scores))))



Random Forest RMSE: 0.21382655817054746


### Inverse-Distance Weighting (IDW)

In [97]:
idw = KNeighborsRegressor(n_neighbors=5, weights = 'distance')
idw.fit(X, y)

cv_scores = cross_val_score(idw, X, y, cv=20, scoring = 'neg_mean_squared_error')
print("IDW RMSE: {}".format(np.sqrt(-np.mean(cv_scores))))

IDW RMSE: 0.21836279588572083


### k-NN

In [89]:
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X, y)

cv_scores = cross_val_score(knn, X, y, cv=20, scoring = 'neg_mean_squared_error')
print("k-NN RMSE: {}".format(np.sqrt(-np.mean(cv_scores))))

k-NN RMSE: 0.22896591952824916


### MLP

In [99]:
def get_dropout(input_tensor, p=0.5, mc=False):
    if mc:
        return K.layers.Dropout(p)(input_tensor, training=True)
    else:
        return K.layers.Dropout(p)(input_tensor)

Train/val/test split as cross-validation is too intensive for a neural net.

In [127]:
n_train = 4500

X_train = X[0:n_train]
y_train = y[0:n_train]

X_val = X[n_train:n_train+500]
y_val = y[n_train:n_train+500]

X_test = X[n_train+500:]
y_test = y[n_train+500:] 

In [121]:
drop_rate = 0.2
mc = True
units = 1028

inputs = K.layers.Input(shape=(2,))

output_1 = K.layers.Dense(units, activation='relu')(inputs)
drop_1 = get_dropout(output_1, p=drop_rate, mc=mc)

output_2 = K.layers.Dense(units//2, activation='relu')(drop_1)
drop_2 = get_dropout(output_2, p=drop_rate, mc=mc)

predictions = K.layers.Dense(1)(drop_2)

In [128]:
sgd = K.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model = K.Model(inputs=inputs, outputs=predictions)
model.compile(optimizer='adam',
              loss='mse',
             batch_size = 32)
model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs = 50)

Train on 4500 samples, validate on 500 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1c66c341a08>

In [None]:
preds = model.predict(X_test)
mse = np.mean((y_test - preds)**2)
rmse = np.sqrt(mse)

print("MLP test RMSE: {}".format(rmse))