# Prediction Function

This notebook aim to make function to preprocess and predict. Here's some details:

**Input:**
- dataset/secondhalf_v4.csv
- outputs/OHE_encoder_second_v3.joblib
- outputs/MM_scaler_second_v2.joblib
- model\Tuned\BiLSTM_second_v3.h5

**Output:**
- for notebook: preprocessing(), predict()
- for website: preprocessing(), predict()


# Load Dataset

In [3]:
import pandas as pd
df = pd.read_csv('dataset/secondhalf_v4.csv')

In [4]:
df.head()

Unnamed: 0,is_holiday,air_pollution_index,humidity,wind_speed,wind_direction,visibility_in_miles,dew_point,temperature,rain_p_h,snow_p_h,clouds_all,weather_type,weather_description,traffic_volume,date_time
0,Not a Holiday,282.0,65.0,3.0,327.0,5.0,5.0,287.59,0.0,0.0,92.0,Rain,light rain,2886.0,2015-06-11 20:00:00
1,Not a Holiday,273.0,65.0,3.0,326.91,5.05,5.05,288.04,0.0,0.0,87.82,Rain,light rain,2953.91,2015-06-11 21:00:00
2,Not a Holiday,264.0,65.0,3.0,326.82,5.09,5.09,288.49,0.0,0.0,83.64,Rain,light rain,3021.82,2015-06-11 22:00:00
3,Not a Holiday,255.0,65.0,3.0,326.73,5.14,5.14,288.94,0.0,0.0,79.45,Rain,light rain,3089.73,2015-06-11 23:00:00
4,Not a Holiday,246.0,65.0,3.0,326.64,5.18,5.18,289.4,0.0,0.0,75.27,Clear,sky is clear,3157.64,2015-06-12 00:00:00


In [5]:
# to make sample data for website
df_sample = df[:50]

In [6]:
# df_sample.to_csv('dataset/sample_v2.csv', index=False)

# Function to Preprocess and Predict Raw Data

make function to preprocess and predict

In [2]:
def preprocessing(input_df):
    # Categorical Features
    ohe_column = ['is_holiday', 'weather_type', 'weather_description']
    encoded_data = encoder.transform(input_df[ohe_column])

    # Concatenate Encoded Columns to Main DataFrame
    df_concated = pd.concat([input_df, pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out())], axis=1)
    df_concated.drop(columns=ohe_column, inplace=True)

    # Feature Engineering
    df_concated['date_time'] = pd.to_datetime(df_concated['date_time'])
    df_concated['day'] = df_concated['date_time'].dt.day
    df_concated['month'] = df_concated['date_time'].dt.month
    df_concated['hour'] = df_concated['date_time'].dt.hour

    # Set the 'datetime_col' as the index of the DataFrame
    df_concated.set_index('date_time', inplace=True)

    # Put Target column to the last column
    df_concated['traffic_volume'] = df_concated.pop('traffic_volume')

    # Scale the data
    df_concated = pd.DataFrame(scaler.transform(df_concated), columns=scaler.get_feature_names_out(), index=df_concated.index)

    # Reshape to fit RNN needs
    X = []

    n_future = 1   # Number of data we want to look into the future based on the past data.
    n_past = 24  # Number of past data we want to use to predict the future.

    for i in range(n_past, len(df_concated) - n_future + 1):
        X.append(df_concated.iloc[i - n_past:i].values)

    return np.array(X)

def predict(input_df):
    X = preprocessing(input_df)
    return model.predict(X)

In [10]:
pred = predict(df_sample)



In [7]:
pred

array([[0.46962887],
       [0.40817383],
       [0.4639085 ],
       [0.43962893],
       [0.40824783],
       [0.4467039 ],
       [0.4511142 ],
       [0.44394234],
       [0.44184676],
       [0.43967226],
       [0.44469383],
       [0.44680092],
       [0.44819382],
       [0.44579047],
       [0.44378352],
       [0.44081643],
       [0.4379945 ],
       [0.43667823],
       [0.43548775],
       [0.43359905],
       [0.4292503 ],
       [0.42365956],
       [0.41720188],
       [0.41246632],
       [0.4132428 ],
       [0.41192174]], dtype=float32)

# Try to fetch from Database

Here I want to fetch data from my local MySQL database to simulate when I fetch data for my website

In [50]:
import mysql.connector

In [51]:
mysql_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '',
    'database': 'tugas_akhir',
}

In [52]:
conn = mysql.connector.connect(**mysql_config)

In [59]:
sql = "SELECT * FROM ( SELECT * FROM sample_v2 ORDER BY id DESC LIMIT 24 ) as SUBQUERY ORDER BY id"

mycursor = conn.cursor(dictionary=True)
mycursor.execute(sql)
myresult = mycursor.fetchall()

In [60]:
df = pd.DataFrame(myresult)
df.drop(columns=['id'], inplace=True)

# Revise Functions so it Fit with Dataset from Database

There's some revisions I need to make to the function so it suitable with my need at the website

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from tensorflow.keras.models import load_model

# Load encoder and scaler
encoder = joblib.load('outputs/OHE_encoder_second_v3.joblib')
scaler = joblib.load('outputs/MM_scaler_second_v2.joblib')
model = load_model('model\Tuned\BiLSTM_second_v3.h5')

In [66]:
def preprocess_data(input_df):
    # Columns to convert to float
    columns_to_convert = [
        'air_pollution_index', 'humidity', 'wind_speed', 'wind_direction',
        'visibility_in_miles', 'dew_point', 'temperature', 'rain_p_h',
        'snow_p_h', 'clouds_all', 'traffic_volume'
    ]

    # Convert specified columns to float
    input_df[columns_to_convert] = input_df[columns_to_convert].astype(float)

    # Categorical Features
    ohe_column = ['is_holiday', 'weather_type', 'weather_description']
    encoded_data = encoder.transform(input_df[ohe_column])

    # Concatenate Encoded Columns to Main DataFrame
    df_concated = pd.concat([input_df, pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out())], axis=1)
    df_concated.drop(columns=ohe_column, inplace=True)

    # Feature Engineering
    df_concated['date_time'] = pd.to_datetime(df_concated['date_time'])
    df_concated['day'] = df_concated['date_time'].dt.day
    df_concated['month'] = df_concated['date_time'].dt.month
    df_concated['hour'] = df_concated['date_time'].dt.hour

    # Set the 'date_time' as the index of the DataFrame
    df_concated.set_index('date_time', inplace=True)

    # Move Target column to the last
    df_concated['traffic_volume'] = df_concated.pop('traffic_volume')

    # Scale the data
    df_concated = pd.DataFrame(scaler.transform(df_concated), columns=scaler.get_feature_names_out(), index=df_concated.index)

    # Reshape into Input Data Requirement
    X = df_concated.values.reshape(1, 24, -1)  # Adjust the shape as per your model's requirement

    return X

In [75]:
def predict(input_df):
    X = preprocess_data(input_df)
    X_copy = np.repeat(model.predict(X), 62, axis=-1)
    X_unscale = scaler.inverse_transform(X_copy)[:,-1]
    return X_unscale

In [76]:
pred = predict(df)



In [77]:
pred

array([2985.3638], dtype=float32)