# Neural Network Pediction
Predict the different time resolutions with a trained neural network.

In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import os

from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [22]:
path_output=os.path.join(os.getcwd(), "..", "data", "output")

In [23]:
features = pd.read_csv(os.path.join(path_output, "Features.csv"))
features = features.drop("Unnamed: 0", axis=1)

### Take Params from csv

In [24]:
params = pd.read_csv(os.path.join(os.getcwd(), "..", "data", "input", "params.csv")).drop("0", axis=1)
_test_size = params[params["param"]=="test_size"]["value"].values[0]
_random_state = int(params[params["param"]=="random_state"]["value"].values[0])

# Prediction

In [30]:
def predict(on="24_sum", hex_size="hexa_small"):
    """
        Predict the number of trips in a given time resolution.
        
        Args:
            on (str): time resolution to train on
            
        Returns:
            y_test_predicted (Series): Predicted test data
    
    """
    # Temporal Resolution
    print("Temporal Resolution is", on)
    features_X = features.drop(["24_sum", "6_sum", "2_sum", "1_sum"], axis=1)
    features_y = features[on]
    
    # Spatial Resolution
    print("Spatial Resolution is", hex_size)
    if hex_size=="hexa_small":
        features_X = features_X.drop("hexa_big", axis=1)
    else:
        features_X = features_X.drop("hexa_small", axis=1)
        
    print("Split Data with random state", _random_state, "and test size", str(_test_size)+"...")
    X_train, X_test, y_train, y_test = train_test_split(features_X, features_y, random_state=_random_state, test_size=_test_size)
    
    print("Scale Data with Standard Scaler...")
    with open(os.path.join(path_output, "models", "Standard_Scaler_"+hex_size+".pkl"), "rb") as f:
        standard_scaler = pickle.load(f)
    X_test_scaled = standard_scaler.transform(X_test)

    print("Do PCA on Data...")
    with open(os.path.join(path_output, "models", "PCA_"+hex_size+".pkl"), "rb") as f:
        pca = pickle.load(f)
    X_test_transformed = pca.transform(X_test_scaled)
    
    print("Load Model", on+"...")
    nn_model = keras.models.load_model(os.path.join(path_output, "models", "NN_Regression_Model_"+on+"_"+hex_size))
    print("Predict...")
    y_test_predicted = nn_model.predict(X_test_transformed)
    
    return y_test_predicted, y_test

In [31]:
# time_resolution is interchangable with ["24_sum", "6_sum", "2_sum", "1_sum"]
# spatial_resolution is interchangable with ["hexa_small", "hexa_big"]
time_resolution = "1_sum"
spatial_resolution = "hexa_small"

y_test_predicted, y_test = predict(on=time_resolution, hex_size=spatial_resolution)

print("Plot difference between Real and Predicted...")
fig, ax = plt.subplots(figsize=(16, 8), dpi=300)
ax.plot(y_test_predicted, y_test, "bo")
ax.set_title("Real Y vs Predicted Y "+time_resolution, fontsize=20)
ax.set_xlabel("Predicted Y", fontsize=18)
ax.set_ylabel("Real Y", fontsize=18)
fig.savefig(os.path.join(path_output, "Real_vs_Predicted_"+time_resolution+"_"+spatial_resolution+".png"))
plt.close(fig)
print("Done")

Temporal Resolution is 1_sum
Spatial Resolution is hexa_small
Split Data with random state 42 and test size 0.3...
Scale Data with Standard Scaler...
Do PCA on Data...
Load Model 1_sum...
Predict...
Plot difference between Real and Predicted...
Done
