# Load Dataset

In [3]:
import pandas as pd
df = pd.read_csv('dataset/secondhalf.csv')

# Function to Preprocess Raw Data

In [15]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

def preprocessing(input_df):
    # Load encoder and scaler
    encoder = joblib.load('outputs/OHE_encoder_second.joblib')
    scaler = joblib.load('outputs/MM_scaler_second.joblib')

    # Categorical Features
    ohe_column = ['is_holiday', 'weather_type', 'weather_description']
    encoded_data = encoder.transform(input_df[ohe_column])
    df_encoded = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out())

    # Concatenate Encoded Columns to Main DataFrame
    df_concated = pd.concat([input_df, df_encoded], axis=1)
    df_concated.drop(columns=['is_holiday', 'weather_type', 'weather_description'], inplace=True)

    # Feature Engineering
    df_concated['date_time'] = pd.to_datetime(df_concated['date_time'])
    df_concated['day'] = df_concated['date_time'].dt.day
    df_concated['month'] = df_concated['date_time'].dt.month
    df_concated['hour'] = df_concated['date_time'].dt.hour

    # Set the 'datetime_col' as the index of the DataFrame
    df_concated = df_concated.set_index('date_time')

    # Put Target column to the last column
    df_target = df_concated['traffic_volume']
    df_concated.drop(columns=['traffic_volume'], inplace=True)
    df_concated['traffic_volume'] = df_target

    # Scale the data
    scaled_data = scaler.transform(df_concated)

    df_scaled = pd.DataFrame(scaled_data, columns=scaler.get_feature_names_out())
    df_scaled.index = df_concated.index

    # Reshape to fit RNN needs
    X_test = []

    n_future = 1   # Number of data we want to look into the future based on the past data.
    n_past = 24  # Number of past data we want to use to predict the future.

    for i in range(n_past, len(scaled_data) - n_future + 1):
        X_test.append(scaled_data[i - n_past:i, 0:df_scaled.shape[1]])

    X_test = np.array(X_test)

    return X_test

# Function to Predict from Raw Data

In [16]:
from tensorflow.keras.models import load_model

def predict(input_df):
    # Preprocess raw data
    X_test = preprocessing(input_df)

    # Load the model
    model = load_model('model\Tuned\BiLSTM_second.h5')

    # Predict on preprocessed data using loaded model
    predictions = model.predict(X_test)

    return predictions

In [17]:
y_test = predict(df)



In [18]:
y_test.shape

(16924, 1)

In [19]:
y_test

array([[0.46986553],
       [0.44493145],
       [0.47829697],
       ...,
       [0.3122663 ],
       [0.2533378 ],
       [0.18210873]], dtype=float32)