<a href="https://colab.research.google.com/github/tejasnavalkhe/Optimising-Car-Sharing-Profitability-with-a-Regional-Pricing-Strategy/blob/master/code/Rates%20Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Load library and data

## 1.1. Load library

In [1]:
pip install category_encoders



In [2]:
# Function to format the y-axis labels
def format_y_axis(value, tick_number):
    return f'{value:,.0f}'

In [3]:
# Import required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import warnings
from scipy import stats
from matplotlib.ticker import FuncFormatter
from category_encoders import BinaryEncoder, OneHotEncoder
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
import joblib

# Display all the columns of the dataframe
pd.pandas.set_option('display.max_columns', None)

# Ignore all warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

# Data location
DATA_PATH = "/content/drive/MyDrive/MSc Dissertation/data/"
# Encoders location
ENCODERS_PATH = "/content/drive/MyDrive/MSc Dissertation/encoders/"
# Model location
MODEL_PATH = "/content/drive/MyDrive/MSc Dissertation/models/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1.2. Load data

In [4]:
raw_df = pd.read_csv(DATA_PATH + '2024 Bookings.csv')
raw_df.head()

Unnamed: 0,account_id,Contract,user_id,vehicle_description,vehicle_registration,vehicle_communication_id,vehicle_operator_name,vehicle_office_use,location_description,location_office_use,booking_id,booking_reservation,booking_maintenance,booking_start,booking_end,booking_duration,booking_actual_start,booking_actual_end,booking_actual_duration,booking_billed_start,booking_billed_end,booking_billed_duration,booking_mileage,booking_tariff,booking_rates_hours,booking_rates_24hours,booking_rates_overnight,booking_estimated_cost,booking_actual_cost_distance,booking_actual_cost_time,booking_actual_cost_total,booking_credits_used,booking_transactions_value,booking_total_paid,booking_status,booking_ended_early,booking_created_at,booking_cancelled_at,booking_cancellation_reason
0,d4470f76-e885-11ee-85ef-e36c0de5945c,PAYG,4facab36-e885-11ee-a3bd-07de9431abb8,White Toyota Corolla Petrol Hybrid Estate (Aut...,NL70 BXV,1502,Co-wheels,PAYG,Reading - University Whiteknights Campus,REA005,8a874d26-10a6-11ef-ad23-5f794c6a9a23,96748829,no,05/12/2024 23:45,5/13/2024 10:30,645,5/13/2024 3:07,5/13/2024 8:42,335,05/12/2024 23:45,5/13/2024 10:30,645,33.0,Co-wheels Standard Large,0.0,1,0,59.2,7.92,59.2,67.12,0,-67.12,67.12,ended,no,05/12/2024 22:28,,
1,ad4720a8-371c-11e8-9c54-45ef183a979b,Contract,c7b038e0-7498-11ec-8cc3-71fcb7a46099,Toyota RAV4 Hybrid (Automatic) - NL71 GJK,NL71 GJK,1680,Co-wheels,Pool Car - Telematics,North Shields - North Tyneside General Hospital,NTH006,0e355148-10ae-11ef-a785-871fd5ad059c,96748838,no,05/12/2024 23:15,5/13/2024 8:15,540,5/13/2024 0:59,5/13/2024 7:41,402,05/12/2024 23:15,5/13/2024 8:15,540,6.0,"Billing Exempt (24/7) (Default, Co-wheels)",9.0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,ended,no,05/12/2024 23:22,,
2,bf50a572-ad2c-11e8-8b49-31a8e401ec81,Contract,eeadfff2-f036-11e8-a070-b5d4b9914a5e,Toyota RAV4 Hybrid (Automatic) - NL71 GFO,NL71 GFO,1678,Co-wheels,Pool Car - Telematics,Bedlington - Foundry House,NTH003,11b2208c-10a2-11ef-8301-3f52737f5ce6,96748825,no,05/12/2024 22:45,5/13/2024 8:00,555,05/12/2024 23:06,5/13/2024 7:35,509,05/12/2024 22:45,5/13/2024 8:00,555,48.0,"Billing Exempt (24/7) (Default, Co-wheels)",9.25,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,ended,no,05/12/2024 21:56,,
3,6ac5731a-845f-11e9-8302-0b50d458dea3,PAYG,ec665b10-845e-11e9-97c2-d5ae114ceffd,White Toyota Yaris Petrol Hybrid (Automatic) -...,NC19 KYV,1291,Co-wheels,PAYG - Funded,Glasgow - Coustonholm Road,GLA024,a5e831b6-107e-11ef-9cc1-d71c27abf5d8,96748719,no,05/12/2024 22:30,05/12/2024 23:00,30,05/12/2024 22:32,05/12/2024 23:01,29,05/12/2024 22:30,05/12/2024 23:00,30,7.0,Glasgow Everyday,0.5,0,0,2.88,1.54,2.88,4.42,0,-4.42,4.42,ended,no,05/12/2024 17:42,,
4,bf50a572-ad2c-11e8-8b49-31a8e401ec81,Contract,ebad311e-ae10-11ed-8108-51e469acfaa6,Toyota RAV4 Hybrid (Automatic) - NL71 GFK,NL71 GFK,1677,Co-wheels,Pool Car - Telematics,Bedlington - Foundry House,NTH003,5ff3faba-07d6-11ef-b3bf-e375787a181c,96741857,no,05/12/2024 22:15,5/13/2024 8:00,585,05/12/2024 22:13,5/13/2024 7:37,564,05/12/2024 22:15,5/13/2024 8:00,585,52.0,"Billing Exempt (24/7) (Default, Co-wheels)",9.75,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,ended,no,05/01/2024 17:17,,


## 1.3. Load encoders

In [5]:
# Load the encoders
binary_encoder = joblib.load(ENCODERS_PATH + 'binary_encoder.pkl')
one_hot_encoder = joblib.load(ENCODERS_PATH + 'one_hot_encoder.pkl')

# 2. Data Preprocessing

## 2.1. Preprocess Data

In [6]:
# define a function for data preprocessing
def preprocess_data(dataframe, tariff_df, location_mapping):
    # Step 1: Only select PAYG fleet data
    dataframe = dataframe[dataframe['Contract'] == 'PAYG']
    tariff_df = tariff_df[tariff_df['PAYG or Contract'] == 'PAYG']

    # Step 2: tariff_df and dataframe essential cleaning
    tariff_df.rename(columns={'Petrol Or EV': 'Fuel Type'}, inplace=True)
    dataframe.loc[dataframe['booking_tariff'] == 'McCarthy & Stone EV', 'Contract'] = 'Contract'

    # Step 3: Convert datatypes to pandas datetime object
    date_columns = ['booking_start', 'booking_end', 'booking_actual_start', 'booking_actual_end',
                    'booking_billed_start', 'booking_billed_end', 'booking_created_at', 'booking_cancelled_at']
    for col in date_columns:
        dataframe[col] = pd.to_datetime(dataframe[col], errors='coerce')

    # Step 4: Convert to Integer type
    int_columns = ['booking_duration', 'booking_actual_duration', 'booking_billed_duration',
                   'booking_rates_24hours', 'booking_rates_overnight']
    for col in int_columns:
        # dataframe[col] = pd.to_numeric(dataframe[col], errors='coerce').fillna(0).astype(int)
        dataframe[col] = pd.to_numeric(dataframe[col], errors='coerce').astype(int)

    # Step 5: Convert to float type
    float_columns = ['booking_rates_hours', 'booking_actual_cost_distance', 'booking_actual_cost_time',
                     'booking_actual_cost_total']
    for col in float_columns:
        # dataframe[col] = pd.to_numeric(dataframe[col], errors='coerce').fillna(0.0).astype(float)
        dataframe[col] = pd.to_numeric(dataframe[col], errors='coerce').astype(float)

    # Step 6: Missing Values
    dataframe = dataframe.dropna(subset=['location_office_use', 'location_description'], how='all')

    # Step 7: Create Location column from location_office_use
    dataframe['location_office_use'] = dataframe['location_office_use'].str[:3]
    # Replace 'location_office_use' with short codes where 'location_description' contains longer text only if 'location_office_use' is null
    dataframe.loc[(dataframe['location_description'].str.contains('Lower Maudlin|Bristol', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'BRI'
    dataframe.loc[(dataframe['location_description'].str.contains('glasgow|Glsgow', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'GLA'
    dataframe.loc[(dataframe['location_description'].str.contains('Nwcastle', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'NCL'
    dataframe.loc[(dataframe['location_description'].str.contains('Birmingham', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'BIR'
    dataframe.loc[(dataframe['location_description'].str.contains('Tunbridge Wells', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'TUN'
    dataframe.loc[(dataframe['location_description'].str.contains('Frome', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'FRO'
    dataframe.loc[(dataframe['location_description'].str.contains('Exeter', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'EXE'
    dataframe.loc[(dataframe['location_description'].str.contains('Durham', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'DUR'
    dataframe.loc[(dataframe['location_description'].str.contains('Salford|Slaford', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'SAL'
    dataframe.loc[(dataframe['location_description'].str.contains("S'land", case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'SWI'
    dataframe.loc[(dataframe['location_description'].str.contains('Reading', case=False, na=False)) & (dataframe['location_office_use'].isnull()), 'location_office_use'] = 'REA'

    dataframe['location'] = dataframe['location_office_use'].map(location_mapping)

    # Step 8: Merge tariff dataframe with original dataframe
    dataframe = pd.merge(dataframe, tariff_df, left_on='booking_tariff', right_on='Tariff', how='left')

    # Step 9: Only Select Size Category other than Various
    dataframe = dataframe[dataframe['Size Category'] != 'Various']

    # Step 10: Extract features from dates
    date_features = ['hour', 'dayofweek', 'month', 'year']
    for feature in date_features:
        dataframe[f'booking_billed_start_{feature}'] = getattr(dataframe['booking_billed_start'].dt, feature)
        dataframe[f'booking_billed_end_{feature}'] = getattr(dataframe['booking_billed_end'].dt, feature)
        dataframe[f'booking_created_at_{feature}'] = getattr(dataframe['booking_created_at'].dt, feature)

    # Step 11: Weekend and peak hours
    ## Weekend
    dataframe['is_weekend'] = dataframe['booking_billed_start_dayofweek'].apply(lambda x: 1 if x in [5, 6] else 0)

    ## peak hours
    # Aggregate bookings by hour
    hourly_bookings = dataframe.groupby('booking_created_at_hour').size()
    hourly_bookings = pd.DataFrame(hourly_bookings, columns=['bookings'])

    # Identify peak hours
    threshold = np.int32(np.round(np.percentile(hourly_bookings['bookings'], 75)))
    peak_hours = hourly_bookings[hourly_bookings['bookings'] >= threshold].index.tolist()

    # add column for is_peak_hour
    dataframe['is_peak_hour'] = dataframe['booking_created_at_hour'].apply(lambda x: 1 if x in peak_hours else 0)

    # Step 12: Remove columns
    columns_to_remove = ['account_id', 'Contract', 'user_id', 'location_description', 'location_office_use', 'vehicle_description', 'vehicle_registration',
                         'vehicle_communication_id', 'vehicle_operator_name', 'vehicle_office_use', 'booking_reservation', 'booking_maintenance',
                         'booking_start', 'booking_end', 'booking_credits_used', 'booking_transactions_value', 'booking_estimated_cost', 'booking_total_paid',
                         'booking_status', 'booking_ended_early', 'booking_cancelled_at', 'booking_cancellation_reason', 'PAYG or Contract', 'Notes',
                         'booking_duration', 'Tariff', 'Size Category']
    dataframe.drop(columns=columns_to_remove, inplace=True)

    # Step 13: Drop duplicates based on ‘booking_id’ keep=‘first’
    dataframe.drop_duplicates(subset=['booking_id'], keep='first', inplace=True)

    # Step 14: Add Seasons features (apply it to booking_billed_start)
    def get_season(date):
        month = date.month
        day = date.day
        if (month == 3 and day >= 1) or (month > 3 and month < 6) or (month == 6 and day <= 30):
            return 'Spring'
        elif (month == 6 and day >= 1) or (month > 6 and month < 9) or (month == 9 and day <= 30):
            return 'Summer'
        elif (month == 9 and day >= 1) or (month > 9 and month < 12) or (month == 12 and day <= 31):
            return 'Autumn'
        else:
            return 'Winter'

    dataframe['season'] = dataframe['booking_billed_start'].apply(get_season)

    # Step 15: Add Holiday features
    def is_holiday(date):
        if (date.month == 12 and date.day >= 24) or (date.month == 1 and date.day <= 1) or \
        (date.month == 5 and date.day == 27) or (date.month == 3 and date.day == 29) or \
        (date.month == 8 and date.day == 26) or (date.month == 4 and date.day == 1) or (date.month == 5 and date.day == 6):
            return 1
        return 0

    dataframe['is_holiday'] = dataframe['booking_billed_start'].apply(is_holiday)

    # Step 16: Outliers
    # Check for outliers using IQR
    Q1 = dataframe[['booking_actual_duration', 'booking_billed_duration', 'booking_mileage', 'booking_actual_cost_distance', 'booking_actual_cost_time', 'booking_actual_cost_total']].quantile(0.25)
    Q3 = dataframe[['booking_actual_duration', 'booking_billed_duration', 'booking_mileage', 'booking_actual_cost_distance', 'booking_actual_cost_time', 'booking_actual_cost_total']].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Handling outliers by capping and flooring
    dataframe['booking_actual_duration'] = np.where(dataframe['booking_actual_duration'] > upper_bound['booking_actual_duration'], upper_bound['booking_actual_duration'], dataframe['booking_actual_duration'])
    dataframe['booking_actual_duration'] = np.where(dataframe['booking_actual_duration'] < lower_bound['booking_actual_duration'], lower_bound['booking_actual_duration'], dataframe['booking_actual_duration'])

    dataframe['booking_billed_duration'] = np.where(dataframe['booking_billed_duration'] > upper_bound['booking_billed_duration'], upper_bound['booking_billed_duration'], dataframe['booking_billed_duration'])
    dataframe['booking_billed_duration'] = np.where(dataframe['booking_billed_duration'] < lower_bound['booking_billed_duration'], lower_bound['booking_billed_duration'], dataframe['booking_billed_duration'])

    dataframe['booking_mileage'] = np.where(dataframe['booking_mileage'] > upper_bound['booking_mileage'], upper_bound['booking_mileage'], dataframe['booking_mileage'])
    dataframe['booking_mileage'] = np.where(dataframe['booking_mileage'] < lower_bound['booking_mileage'], lower_bound['booking_mileage'], dataframe['booking_mileage'])

    dataframe['booking_actual_cost_distance'] = np.where(dataframe['booking_actual_cost_distance'] > upper_bound['booking_actual_cost_distance'], upper_bound['booking_actual_cost_distance'], dataframe['booking_actual_cost_distance'])
    dataframe['booking_actual_cost_distance'] = np.where(dataframe['booking_actual_cost_distance'] < lower_bound['booking_actual_cost_distance'], lower_bound['booking_actual_cost_distance'], dataframe['booking_actual_cost_distance'])

    dataframe['booking_actual_cost_time'] = np.where(dataframe['booking_actual_cost_time'] > upper_bound['booking_actual_cost_time'], upper_bound['booking_actual_cost_time'], dataframe['booking_actual_cost_time'])
    dataframe['booking_actual_cost_time'] = np.where(dataframe['booking_actual_cost_time'] < lower_bound['booking_actual_cost_time'], lower_bound['booking_actual_cost_time'], dataframe['booking_actual_cost_time'])

    dataframe['booking_actual_cost_total'] = np.where(dataframe['booking_actual_cost_total'] > upper_bound['booking_actual_cost_total'], upper_bound['booking_actual_cost_total'], dataframe['booking_actual_cost_total'])
    dataframe['booking_actual_cost_total'] = np.where(dataframe['booking_actual_cost_total'] < lower_bound['booking_actual_cost_total'], lower_bound['booking_actual_cost_total'], dataframe['booking_actual_cost_total'])

    # Step 17: per_mile
    # check if booking_mileage is zero?
    dataframe['booking_mileage'] = dataframe['booking_mileage'].replace(0, np.nan)
    dataframe['booking_mileage'] = dataframe['booking_mileage'].replace('', np.nan)
    dataframe.dropna(subset=['booking_mileage'], inplace=True)
    dataframe['per_mile'] = dataframe['booking_actual_cost_distance'] / dataframe['booking_mileage']
    new_order = ['location', 'booking_id', 'booking_actual_start', 'booking_actual_end', 'booking_actual_duration', 'booking_billed_start', 'booking_billed_start_hour',
             'booking_billed_start_dayofweek', 'booking_billed_start_month', 'booking_billed_start_year', 'booking_billed_end', 'booking_billed_end_hour',
             'booking_billed_end_dayofweek', 'booking_billed_end_month', 'booking_billed_end_year', 'booking_billed_duration', 'booking_mileage',
             'booking_rates_hours', 'booking_rates_24hours', 'booking_rates_overnight', 'booking_actual_cost_distance', 'booking_actual_cost_time',
             'booking_actual_cost_total', 'booking_created_at',	'booking_created_at_hour', 'booking_created_at_dayofweek', 'booking_created_at_month',
             'booking_created_at_year', 'season', 'is_holiday', 'Vehicle Type', 'Fuel Type', 'is_peak_hour', 'is_weekend', 'per_mile']

    dataframe = dataframe[new_order]

    return dataframe

## 2.2. Data Transformation

In [7]:
# log transformation
def transform_data(dataframe):
    numerical_features = ['booking_actual_duration', 'booking_billed_duration', 'booking_mileage',
                      'booking_actual_cost_distance', 'booking_actual_cost_time',
                      'booking_actual_cost_total']
    for feature in numerical_features:
        dataframe[feature] = np.log1p(dataframe[feature])

    return dataframe

## 2.3 Data Inverse Transformation

In [8]:
# log transformation
def inverse_transform_data(dataframe):
    numerical_features = ['booking_actual_duration', 'booking_billed_duration', 'booking_mileage',
                      'booking_actual_cost_distance', 'booking_actual_cost_time',
                      'booking_actual_cost_total']
    for feature in numerical_features:
        dataframe[feature] = np.expm1(dataframe[feature])

    return dataframe

## 2.4. Feature Encoding

In [9]:
def encode_features(dataframe):
    categorical_features = ['location', 'season', 'is_holiday', 'is_peak_hour', 'is_weekend', 'Vehicle Type', 'Fuel Type']

    # Binary Encoding for 'location' due to high cardinality
    locations = binary_encoder.transform(dataframe[categorical_features[0]])

    # One-Hot Encoding for other categorical features
    other_features = one_hot_encoder.transform(dataframe[categorical_features[1::]])

    dataframe = pd.concat([dataframe, locations, other_features], axis=1)
    dataframe.drop(categorical_features, axis=1, inplace=True)
    new_order = ['location_0', 'location_1', 'location_2', 'location_3', 'location_4', 'location_5', 'location_6', 'booking_id', 'booking_actual_start',
                 'booking_actual_end', 'booking_actual_duration', 'booking_billed_start', 'booking_billed_start_hour', 'booking_billed_start_dayofweek',
                 'booking_billed_start_month', 'booking_billed_start_year', 'booking_billed_end', 'booking_billed_end_hour', 'booking_billed_end_dayofweek',
                 'booking_billed_end_month', 'booking_billed_end_year', 'booking_billed_duration', 'booking_mileage', 'booking_rates_hours',
                 'booking_rates_24hours', 'booking_rates_overnight', 'booking_actual_cost_distance', 'booking_actual_cost_time', 'booking_actual_cost_total',
                 'booking_created_at',	'booking_created_at_hour', 'booking_created_at_dayofweek', 'booking_created_at_month', 'booking_created_at_year',
                 'season_Winter', 'season_Autumn', 'season_Summer', 'season_Spring', 'is_holiday_1.0', 'is_holiday_0.0', 'Vehicle Type_City',
                 'Vehicle Type_Everyday', 'Vehicle Type_Family', 'Vehicle Type_Van', 'Vehicle Type_7 Seater', 'Fuel Type_Petrol', 'Fuel Type_EV', 'Fuel Type_Hydrogen',
                 'is_peak_hour_1.0', 'is_peak_hour_0.0', 'is_weekend_1.0', 'is_weekend_0.0', 'per_mile']

    dataframe = dataframe[new_order]
    return dataframe

## 2.5. Process Data

In [10]:
location_mapping = {
    'ABI': 'Abingdon', 'ABN': 'Aberdeen', 'BIC': 'Bicester', 'BIL': 'Billingshurst', 'BIR': 'Birmingham', 'BNB': 'Banbury', 'BOU': 'Bournemouth',
    'BRE': 'Brentwood', 'BRI': 'Bristol', 'CAN': 'Canterbury', 'CHI': 'Chichester', 'CHM': 'Chelmsford', 'CLY': 'Crawley', 'COA': 'Coatbridge',
    'DAL': 'Dalkeith', 'DDE': 'Dundee', 'DER': 'Derby', 'DUN': 'Dunbar', 'DUR': 'Durham', 'EAS': 'Eastbourne', 'EDI': 'Edinburgh', 'ELI': 'Elgin',
    'EST': 'Eastleigh', 'EXE': 'Exeter', 'EYN': 'Eynsham', 'FAL': 'Falkirk', 'FRO': 'Frome', 'GHD': 'Gateshead', 'GLA': 'Glasgow', 'HAD': 'Haddington',
    'HAI': 'Hainault', 'HAR': 'Harrogate', 'HAS': 'Hastings', 'HOR': 'Horsham', 'HOT': 'Henley-on-Thames', 'HRE': 'Houghton-Regis', 'HUN': 'Huntly',
    'HWY': 'High Wycombe', 'INR': 'Inverurie', 'IOW': 'Isle-of-Wight', 'IPS': 'Ipswich', 'KID': 'Kidlington', 'KNA': 'Knaresborough', 'KNT': 'Maidstone',
    'LAN': 'Lancaster', 'LEM': 'Leamington-Spa', 'LEW': 'Lewes', 'LON': 'Harrow', 'MUS': 'Musselburgh', 'NAN': 'Nantwich', 'NBE': 'North Berwick',
    'NCL': 'Newcastle', 'NEW': 'Newbury', 'NTH': 'North Shields', 'OHL': 'Oxenholme', 'ONF': 'On-fleet Bay', 'ORK': 'Orkney', 'OXF': 'Oxford',
    'PEN': 'Penrith', 'PER': 'Perth', 'PLY': 'Plymouth', 'PUT': 'Putney', 'REA': 'Reading', 'RIP': 'Ripon', 'SAF': 'Saffron Walden', 'SAL': 'Salford',
    'SBY': 'Salisbury', 'SHR': 'Shrewsbury', 'SOL': 'Solihull', 'SSH': 'South Shields', 'SUN': 'Sunderland', 'SWI': 'Swindon', 'TUN': 'Tunbridge Wells',
    'UPP': 'Upper Tooting', 'WAL': 'Walton-on-Thames', 'WAN': 'Wandsworth', 'WAR': 'Warwick', 'WIN': 'Winchester', 'WLG': 'Wallingford', 'WND': 'Windermere',
    'WNT': 'Wantage', 'WOK': 'Wokingham', 'WOR': 'Worthing', 'WRR': 'Warrington', 'WSM': 'Weston-super-Mare'
}

In [11]:
tariff_df = pd.read_csv(DATA_PATH + 'Diff Tariffs.csv')

# data preprocessing
df = preprocess_data(raw_df, tariff_df, location_mapping)

In [12]:
df.head()

Unnamed: 0,location,booking_id,booking_actual_start,booking_actual_end,booking_actual_duration,booking_billed_start,booking_billed_start_hour,booking_billed_start_dayofweek,booking_billed_start_month,booking_billed_start_year,booking_billed_end,booking_billed_end_hour,booking_billed_end_dayofweek,booking_billed_end_month,booking_billed_end_year,booking_billed_duration,booking_mileage,booking_rates_hours,booking_rates_24hours,booking_rates_overnight,booking_actual_cost_distance,booking_actual_cost_time,booking_actual_cost_total,booking_created_at,booking_created_at_hour,booking_created_at_dayofweek,booking_created_at_month,booking_created_at_year,season,is_holiday,Vehicle Type,Fuel Type,is_peak_hour,is_weekend,per_mile
0,Reading,8a874d26-10a6-11ef-ad23-5f794c6a9a23,2024-05-13 03:07:00,2024-05-13 08:42:00,335.0,2024-05-12 23:45:00,23,6,5,2024,2024-05-13 10:30:00,10,0,5,2024,645.0,33.0,0.0,1,0,7.92,59.2,67.12,2024-05-12 22:28:00,22,6,5,2024,Spring,0,Family,Petrol,0,1,0.24
1,Glasgow,a5e831b6-107e-11ef-9cc1-d71c27abf5d8,2024-05-12 22:32:00,2024-05-12 23:01:00,29.0,2024-05-12 22:30:00,22,6,5,2024,2024-05-12 23:00:00,23,6,5,2024,30.0,7.0,0.5,0,0,1.54,2.88,4.42,2024-05-12 17:42:00,17,6,5,2024,Spring,0,Everyday,Petrol,0,1,0.22
2,Tunbridge Wells,770484a8-10a1-11ef-ae9f-4d6cef9a844e,2024-05-12 21:52:00,2024-05-12 23:10:00,78.0,2024-05-12 21:45:00,21,6,5,2024,2024-05-13 00:15:00,0,0,5,2024,150.0,36.0,2.5,0,0,8.28,16.25,24.53,2024-05-12 21:51:00,21,6,5,2024,Spring,0,Everyday,Petrol,0,1,0.23
3,Tunbridge Wells,e30b46ec-109b-11ef-8f56-3d407de5dd99,2024-05-12 21:30:00,2024-05-12 21:56:00,26.0,2024-05-12 21:30:00,21,6,5,2024,2024-05-12 22:30:00,22,6,5,2024,60.0,3.0,1.0,0,0,0.69,7.4,8.09,2024-05-12 21:11:00,21,6,5,2024,Spring,0,Family,Petrol,0,1,0.23
4,Aberdeen,3851e08e-1097-11ef-978c-99dfd9acc5d3,2024-05-12 20:40:00,2024-05-12 21:47:00,67.0,2024-05-12 20:45:00,20,6,5,2024,2024-05-12 22:15:00,22,6,5,2024,90.0,10.0,1.5,0,0,1.4,10.88,12.28,2024-05-12 20:38:00,20,6,5,2024,Spring,0,Everyday,EV,0,1,0.14


## 2.6. Transform Data

In [13]:
# data transformation
df = transform_data(df)

In [14]:
df.head()

Unnamed: 0,location,booking_id,booking_actual_start,booking_actual_end,booking_actual_duration,booking_billed_start,booking_billed_start_hour,booking_billed_start_dayofweek,booking_billed_start_month,booking_billed_start_year,booking_billed_end,booking_billed_end_hour,booking_billed_end_dayofweek,booking_billed_end_month,booking_billed_end_year,booking_billed_duration,booking_mileage,booking_rates_hours,booking_rates_24hours,booking_rates_overnight,booking_actual_cost_distance,booking_actual_cost_time,booking_actual_cost_total,booking_created_at,booking_created_at_hour,booking_created_at_dayofweek,booking_created_at_month,booking_created_at_year,season,is_holiday,Vehicle Type,Fuel Type,is_peak_hour,is_weekend,per_mile
0,Reading,8a874d26-10a6-11ef-ad23-5f794c6a9a23,2024-05-13 03:07:00,2024-05-13 08:42:00,5.817111,2024-05-12 23:45:00,23,6,5,2024,2024-05-13 10:30:00,10,0,5,2024,6.4708,3.526361,0.0,1,0,2.188296,4.097672,4.221271,2024-05-12 22:28:00,22,6,5,2024,Spring,0,Family,Petrol,0,1,0.24
1,Glasgow,a5e831b6-107e-11ef-9cc1-d71c27abf5d8,2024-05-12 22:32:00,2024-05-12 23:01:00,3.401197,2024-05-12 22:30:00,22,6,5,2024,2024-05-12 23:00:00,23,6,5,2024,3.433987,2.079442,0.5,0,0,0.932164,1.355835,1.690096,2024-05-12 17:42:00,17,6,5,2024,Spring,0,Everyday,Petrol,0,1,0.22
2,Tunbridge Wells,770484a8-10a1-11ef-ae9f-4d6cef9a844e,2024-05-12 21:52:00,2024-05-12 23:10:00,4.369448,2024-05-12 21:45:00,21,6,5,2024,2024-05-13 00:15:00,0,0,5,2024,5.01728,3.610918,2.5,0,0,2.227862,2.847812,3.239854,2024-05-12 21:51:00,21,6,5,2024,Spring,0,Everyday,Petrol,0,1,0.23
3,Tunbridge Wells,e30b46ec-109b-11ef-8f56-3d407de5dd99,2024-05-12 21:30:00,2024-05-12 21:56:00,3.295837,2024-05-12 21:30:00,21,6,5,2024,2024-05-12 22:30:00,22,6,5,2024,4.110874,1.386294,1.0,0,0,0.524729,2.128232,2.207175,2024-05-12 21:11:00,21,6,5,2024,Spring,0,Family,Petrol,0,1,0.23
4,Aberdeen,3851e08e-1097-11ef-978c-99dfd9acc5d3,2024-05-12 20:40:00,2024-05-12 21:47:00,4.219508,2024-05-12 20:45:00,20,6,5,2024,2024-05-12 22:15:00,22,6,5,2024,4.51086,2.397895,1.5,0,0,0.875469,2.474856,2.586259,2024-05-12 20:38:00,20,6,5,2024,Spring,0,Everyday,EV,0,1,0.14


## 2.7. Data Encoding

In [15]:
# feature encoding
scaled_df = df.copy()
scaled_df = encode_features(scaled_df)

In [16]:
scaled_df.head()

Unnamed: 0,location_0,location_1,location_2,location_3,location_4,location_5,location_6,booking_id,booking_actual_start,booking_actual_end,booking_actual_duration,booking_billed_start,booking_billed_start_hour,booking_billed_start_dayofweek,booking_billed_start_month,booking_billed_start_year,booking_billed_end,booking_billed_end_hour,booking_billed_end_dayofweek,booking_billed_end_month,booking_billed_end_year,booking_billed_duration,booking_mileage,booking_rates_hours,booking_rates_24hours,booking_rates_overnight,booking_actual_cost_distance,booking_actual_cost_time,booking_actual_cost_total,booking_created_at,booking_created_at_hour,booking_created_at_dayofweek,booking_created_at_month,booking_created_at_year,season_Winter,season_Autumn,season_Summer,season_Spring,is_holiday_1.0,is_holiday_0.0,Vehicle Type_City,Vehicle Type_Everyday,Vehicle Type_Family,Vehicle Type_Van,Vehicle Type_7 Seater,Fuel Type_Petrol,Fuel Type_EV,Fuel Type_Hydrogen,is_peak_hour_1.0,is_peak_hour_0.0,is_weekend_1.0,is_weekend_0.0,per_mile
0,0,0,1,0,1,1,0,8a874d26-10a6-11ef-ad23-5f794c6a9a23,2024-05-13 03:07:00,2024-05-13 08:42:00,5.817111,2024-05-12 23:45:00,23,6,5,2024,2024-05-13 10:30:00,10,0,5,2024,6.4708,3.526361,0.0,1,0,2.188296,4.097672,4.221271,2024-05-12 22:28:00,22,6,5,2024,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0.24
1,0,0,0,0,1,1,0,a5e831b6-107e-11ef-9cc1-d71c27abf5d8,2024-05-12 22:32:00,2024-05-12 23:01:00,3.401197,2024-05-12 22:30:00,22,6,5,2024,2024-05-12 23:00:00,23,6,5,2024,3.433987,2.079442,0.5,0,0,0.932164,1.355835,1.690096,2024-05-12 17:42:00,17,6,5,2024,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0.22
2,0,0,0,1,0,1,0,770484a8-10a1-11ef-ae9f-4d6cef9a844e,2024-05-12 21:52:00,2024-05-12 23:10:00,4.369448,2024-05-12 21:45:00,21,6,5,2024,2024-05-13 00:15:00,0,0,5,2024,5.01728,3.610918,2.5,0,0,2.227862,2.847812,3.239854,2024-05-12 21:51:00,21,6,5,2024,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0.23
3,0,0,0,1,0,1,0,e30b46ec-109b-11ef-8f56-3d407de5dd99,2024-05-12 21:30:00,2024-05-12 21:56:00,3.295837,2024-05-12 21:30:00,21,6,5,2024,2024-05-12 22:30:00,22,6,5,2024,4.110874,1.386294,1.0,0,0,0.524729,2.128232,2.207175,2024-05-12 21:11:00,21,6,5,2024,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0.23
4,0,0,0,1,0,1,1,3851e08e-1097-11ef-978c-99dfd9acc5d3,2024-05-12 20:40:00,2024-05-12 21:47:00,4.219508,2024-05-12 20:45:00,20,6,5,2024,2024-05-12 22:15:00,22,6,5,2024,4.51086,2.397895,1.5,0,0,0.875469,2.474856,2.586259,2024-05-12 20:38:00,20,6,5,2024,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0.14


In [17]:
# drop useless columns for prediction
scaled_df.drop(['booking_actual_start', 'booking_actual_end', 'booking_billed_end', 'booking_created_at'], axis=1, inplace=True)

In [18]:
scaled_df.head()

Unnamed: 0,location_0,location_1,location_2,location_3,location_4,location_5,location_6,booking_id,booking_actual_duration,booking_billed_start,booking_billed_start_hour,booking_billed_start_dayofweek,booking_billed_start_month,booking_billed_start_year,booking_billed_end_hour,booking_billed_end_dayofweek,booking_billed_end_month,booking_billed_end_year,booking_billed_duration,booking_mileage,booking_rates_hours,booking_rates_24hours,booking_rates_overnight,booking_actual_cost_distance,booking_actual_cost_time,booking_actual_cost_total,booking_created_at_hour,booking_created_at_dayofweek,booking_created_at_month,booking_created_at_year,season_Winter,season_Autumn,season_Summer,season_Spring,is_holiday_1.0,is_holiday_0.0,Vehicle Type_City,Vehicle Type_Everyday,Vehicle Type_Family,Vehicle Type_Van,Vehicle Type_7 Seater,Fuel Type_Petrol,Fuel Type_EV,Fuel Type_Hydrogen,is_peak_hour_1.0,is_peak_hour_0.0,is_weekend_1.0,is_weekend_0.0,per_mile
0,0,0,1,0,1,1,0,8a874d26-10a6-11ef-ad23-5f794c6a9a23,5.817111,2024-05-12 23:45:00,23,6,5,2024,10,0,5,2024,6.4708,3.526361,0.0,1,0,2.188296,4.097672,4.221271,22,6,5,2024,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0.24
1,0,0,0,0,1,1,0,a5e831b6-107e-11ef-9cc1-d71c27abf5d8,3.401197,2024-05-12 22:30:00,22,6,5,2024,23,6,5,2024,3.433987,2.079442,0.5,0,0,0.932164,1.355835,1.690096,17,6,5,2024,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0.22
2,0,0,0,1,0,1,0,770484a8-10a1-11ef-ae9f-4d6cef9a844e,4.369448,2024-05-12 21:45:00,21,6,5,2024,0,0,5,2024,5.01728,3.610918,2.5,0,0,2.227862,2.847812,3.239854,21,6,5,2024,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0.23
3,0,0,0,1,0,1,0,e30b46ec-109b-11ef-8f56-3d407de5dd99,3.295837,2024-05-12 21:30:00,21,6,5,2024,22,6,5,2024,4.110874,1.386294,1.0,0,0,0.524729,2.128232,2.207175,21,6,5,2024,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0.23
4,0,0,0,1,0,1,1,3851e08e-1097-11ef-978c-99dfd9acc5d3,4.219508,2024-05-12 20:45:00,20,6,5,2024,22,6,5,2024,4.51086,2.397895,1.5,0,0,0.875469,2.474856,2.586259,20,6,5,2024,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,1,0,0.14


In [19]:
df.head()

Unnamed: 0,location,booking_id,booking_actual_start,booking_actual_end,booking_actual_duration,booking_billed_start,booking_billed_start_hour,booking_billed_start_dayofweek,booking_billed_start_month,booking_billed_start_year,booking_billed_end,booking_billed_end_hour,booking_billed_end_dayofweek,booking_billed_end_month,booking_billed_end_year,booking_billed_duration,booking_mileage,booking_rates_hours,booking_rates_24hours,booking_rates_overnight,booking_actual_cost_distance,booking_actual_cost_time,booking_actual_cost_total,booking_created_at,booking_created_at_hour,booking_created_at_dayofweek,booking_created_at_month,booking_created_at_year,season,is_holiday,Vehicle Type,Fuel Type,is_peak_hour,is_weekend,per_mile
0,Reading,8a874d26-10a6-11ef-ad23-5f794c6a9a23,2024-05-13 03:07:00,2024-05-13 08:42:00,5.817111,2024-05-12 23:45:00,23,6,5,2024,2024-05-13 10:30:00,10,0,5,2024,6.4708,3.526361,0.0,1,0,2.188296,4.097672,4.221271,2024-05-12 22:28:00,22,6,5,2024,Spring,0,Family,Petrol,0,1,0.24
1,Glasgow,a5e831b6-107e-11ef-9cc1-d71c27abf5d8,2024-05-12 22:32:00,2024-05-12 23:01:00,3.401197,2024-05-12 22:30:00,22,6,5,2024,2024-05-12 23:00:00,23,6,5,2024,3.433987,2.079442,0.5,0,0,0.932164,1.355835,1.690096,2024-05-12 17:42:00,17,6,5,2024,Spring,0,Everyday,Petrol,0,1,0.22
2,Tunbridge Wells,770484a8-10a1-11ef-ae9f-4d6cef9a844e,2024-05-12 21:52:00,2024-05-12 23:10:00,4.369448,2024-05-12 21:45:00,21,6,5,2024,2024-05-13 00:15:00,0,0,5,2024,5.01728,3.610918,2.5,0,0,2.227862,2.847812,3.239854,2024-05-12 21:51:00,21,6,5,2024,Spring,0,Everyday,Petrol,0,1,0.23
3,Tunbridge Wells,e30b46ec-109b-11ef-8f56-3d407de5dd99,2024-05-12 21:30:00,2024-05-12 21:56:00,3.295837,2024-05-12 21:30:00,21,6,5,2024,2024-05-12 22:30:00,22,6,5,2024,4.110874,1.386294,1.0,0,0,0.524729,2.128232,2.207175,2024-05-12 21:11:00,21,6,5,2024,Spring,0,Family,Petrol,0,1,0.23
4,Aberdeen,3851e08e-1097-11ef-978c-99dfd9acc5d3,2024-05-12 20:40:00,2024-05-12 21:47:00,4.219508,2024-05-12 20:45:00,20,6,5,2024,2024-05-12 22:15:00,22,6,5,2024,4.51086,2.397895,1.5,0,0,0.875469,2.474856,2.586259,2024-05-12 20:38:00,20,6,5,2024,Spring,0,Everyday,EV,0,1,0.14


# 3. Rates Prediction

In [20]:
# predict hourly_rate and daily_rate for each vehicle types and for location
def predict_rates(dataframe, location):
    df = dataframe.copy()
    # BinaryEncoder inverse transform the location columns
    df['location'] = binary_encoder.inverse_transform(df.filter(like='location_'))['location']

    # Filter data for the given location
    location_data = df[df['location'] == location].copy()

    # Encode location data again
    location_encoded = binary_encoder.transform(location_data[['location']])

    # Prepare features of location_data
    location_data.update(location_encoded)
    location_data.drop(['location'], axis=1, inplace=True)

    predictions_df = pd.DataFrame()

    vehicle_types = [col for col in location_data.columns if col.startswith('Vehicle Type_')]

    model_hourly = None
    model_daily = None
    X = None

    for vehicle_type in vehicle_types:
        vehicle_data = location_data[location_data[vehicle_type] == 1].copy()

        if vehicle_data.empty:
            print(f'No data for {vehicle_type.replace("Vehicle Type_", "")}')
            continue

        # Sort the data based on billed start time
        vehicle_data = vehicle_data.sort_values(by='booking_billed_start')

        # Drop useless features for the current vehicle type
        vehicle_data = vehicle_data.drop(columns=['booking_billed_start', 'booking_id'])

        # copy vehicle_data for booking_actual_cost_distance, booking_actual_cost_time, and booking_actual_cost_total
        vehicle_data_copy = vehicle_data.copy()

        # Inverse log transformation
        vehicle_data_copy = inverse_transform_data(vehicle_data_copy)


        try:
            # booking_actual_cost_distance
            booking_actual_cost_distance = np.round(vehicle_data_copy['booking_actual_cost_distance'].values[-1], 2)

            # booking_actual_cost_time
            booking_actual_cost_time = np.round(vehicle_data_copy['booking_actual_cost_time'].values[-1], 2)

            # booking_actual_cost_total
            booking_actual_cost_total = np.round(vehicle_data_copy['booking_actual_cost_total'].values[-1], 2)

            # booking_rates_hours
            booking_rates_hours = np.round(vehicle_data_copy['booking_rates_hours'].values[-1], 2)

            # booking_rates_24hours
            booking_rates_24hours = np.round(vehicle_data_copy['booking_rates_24hours'].values[-1], 2)

        except IndexError as e:
            print(f"Error processing vehicle type {vehicle_type}: {e}")
            continue

        if 'City' in vehicle_type or '7 Seater' in vehicle_type:
            try:
                scaler = joblib.load(ENCODERS_PATH + f'scaler_{vehicle_type}.pkl')
                model_hourly = keras.models.load_model(MODEL_PATH + f'{vehicle_type.replace("Vehicle Type_", "")}_nn_hourly_rate_model.keras')
                model_daily = keras.models.load_model(MODEL_PATH + f'{vehicle_type.replace("Vehicle Type_", "")}_nn_daily_rate_model.keras')

                try:
                    X = scaler.transform(vehicle_data)
                except ValueError as e:
                    print(f"Error scaling for {vehicle_type.replace('Vehicle Type_', '')}: {e}")
                    continue
            except ValueError as e:
                print(f"Error loading model {vehicle_type.replace('Vehicle Type_', '')}: {e}")
                continue
        elif 'Everyday' in vehicle_type or 'Van' in vehicle_type:
            try:
                model_hourly = joblib.load(MODEL_PATH + f'{vehicle_type.replace("Vehicle Type_", "")}_xgb_hourly_rate_model.pkl')
                model_daily = joblib.load(MODEL_PATH + f'{vehicle_type.replace("Vehicle Type_", "")}_xgb_daily_rate_model.pkl')

                X = vehicle_data.values
            except ValueError as e:
                print(f"Error loading model {vehicle_type.replace('Vehicle Type_', '')}: {e}")
                continue
        elif 'Family' in vehicle_type:
            try:
                model_hourly = joblib.load(MODEL_PATH + f'{vehicle_type.replace("Vehicle Type_", "")}_xgb_hourly_rate_model.pkl')
                model_daily = joblib.load(MODEL_PATH + f'{vehicle_type.replace("Vehicle Type_", "")}_dt_daily_rate_model.pkl')

                X = vehicle_data.values
            except ValueError as e:
                print(f"Error loading model {vehicle_type.replace('Vehicle Type_', '')}: {e}")
                continue

        # check if model_hourly and model_daily not None and X is not empty list
        if model_hourly is None or model_daily is None or X is None:
            continue

        X = X[-1].reshape(1, 47)

        # predictions
        pred_hourly = model_hourly.predict(X).tolist()[0]
        pred_daily = model_daily.predict(X).tolist()[0]

        while type(pred_hourly) != float:
            pred_hourly = pred_hourly[0]

        while type(pred_daily) != float:
            pred_daily = pred_daily[0]


        # Add predictions to the dataframe
        temp_df = pd.DataFrame({
            'vehicle_type': [vehicle_type.replace('Vehicle Type_', '')],
            'location': [location],
            'predicted_hourly': [np.round(pred_hourly, 2)],
            'predicted_daily': [np.round(pred_daily, 2)],
            'actual_cost_distance': [booking_actual_cost_distance],
            'actual_cost_time': [booking_actual_cost_time],
            'actual_revenue': [booking_actual_cost_total],
            'booking_rates_hours': [booking_rates_hours],
            'booking_rates_24hours': [booking_rates_24hours]
        })

        predictions_df = pd.concat([predictions_df, temp_df], axis=0, ignore_index=True)

    return predictions_df

In [21]:
predictions_df = predict_rates(scaled_df, 'Newcastle')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step
No data for 7 Seater


In [22]:
predictions_df

Unnamed: 0,vehicle_type,location,predicted_hourly,predicted_daily,actual_cost_distance,actual_cost_time,actual_revenue,booking_rates_hours,booking_rates_24hours
0,City,Newcastle,4.75,32.39,4.32,20.63,24.95,3.75,0
1,Everyday,Newcastle,5.95,47.6,4.8,17.0,21.8,2.5,0
2,Family,Newcastle,7.4,59.2,3.12,15.0,18.12,2.0,0
3,Van,Newcastle,7.7,61.6,2.16,11.25,13.41,1.25,0


# 4. Demand Factor Calculation

## 4.1. Popular Location Demand Factor

In [23]:
# define a function for popular location based demand factor
def popular_location_demand_factor(historical_data, location):
    # Calculate the number of bookings for each location
    demand_location = historical_data.groupby('location').size()
    demand_location = pd.DataFrame(demand_location, columns=['bookings'])

    if location not in demand_location.index:
        return 0

    # Identify demanded locations
    threshold = np.int32(np.round(np.percentile(demand_location['bookings'], 75)))
    demand_locations = demand_location[demand_location['bookings'] >= threshold].index.tolist()
    max_count = demand_location['bookings'].max()
    location_demand = np.round((demand_location['bookings'] / max_count).mean(), 2)

    # If location is in demand_locations then location has higher demand than other locations.
    if location in demand_locations:
        return location_demand
    else:
        return 0

## 4.2. Peak Hour Demand Factor

In [24]:
# define a function for peak hour demand factor
def peak_hour_demand_factor(historical_data, location, hour):
    # select subset of data based on location
    location_data = historical_data[historical_data['location'] == location]

    if location_data.empty:
        return 0

    # seperate hourly bookings with location
    # Aggregate bookings by hour
    hourly_bookings = location_data.groupby('booking_created_at_hour').size()
    hourly_bookings = pd.DataFrame(hourly_bookings, columns=['bookings'])

    # Identify peak hours
    threshold = np.int32(np.round(np.percentile(hourly_bookings['bookings'], 75)))  # Example threshold for peak hours
    peak_hours = hourly_bookings[hourly_bookings['bookings'] >= threshold].index.tolist()
    max_count = hourly_bookings['bookings'].max()
    peak_hour_demand = np.round((hourly_bookings['bookings'] / max_count).mean(), 2)

    # If hour is in peak_hours then it has higher demand than other hours.
    if hour in peak_hours:
        return peak_hour_demand
    else:
        return 0

## 4.3. Overall Demand Factor

In [25]:
# define a function for demand factor
def demand_factor(historical_data, location, hour):
    hour_demand_factor = peak_hour_demand_factor(historical_data, location, hour)
    location_demand_factor = popular_location_demand_factor(historical_data, location)
    final_demand_factor = np.round(np.mean([hour_demand_factor, location_demand_factor]), 5)
    return final_demand_factor

## 4.4. Calculate Prices based on demand

In [26]:
from datetime import datetime
import pytz

current_time = datetime.now(pytz.timezone('Europe/London'))
current_time_hour = current_time.hour

demand_factor_value = demand_factor(df, 'Newcastle', current_time_hour)

In [27]:
demand_factor_value

0.03

In [28]:
# Use dynamic pricing formula for hourly_rate and daily_rate
predictions_df['adjusted_hourly'] = np.round(predictions_df['predicted_hourly'] + (demand_factor_value * predictions_df['predicted_hourly']), 2)
predictions_df['adjusted_daily'] = np.round(predictions_df['predicted_daily'] + (demand_factor_value * predictions_df['predicted_daily']), 2)

In [29]:
predictions_df

Unnamed: 0,vehicle_type,location,predicted_hourly,predicted_daily,actual_cost_distance,actual_cost_time,actual_revenue,booking_rates_hours,booking_rates_24hours,adjusted_hourly,adjusted_daily
0,City,Newcastle,4.75,32.39,4.32,20.63,24.95,3.75,0,4.89,33.36
1,Everyday,Newcastle,5.95,47.6,4.8,17.0,21.8,2.5,0,6.13,49.03
2,Family,Newcastle,7.4,59.2,3.12,15.0,18.12,2.0,0,7.62,60.98
3,Van,Newcastle,7.7,61.6,2.16,11.25,13.41,1.25,0,7.93,63.45


In [30]:
# Calculate booking adjusted cost time
predictions_df['adjusted_cost_time'] = np.round((predictions_df['booking_rates_hours'] * predictions_df['adjusted_hourly']) + (predictions_df['booking_rates_24hours'] * predictions_df['adjusted_daily']), 2)

In [31]:
predictions_df['adjusted_revenue'] = np.round(predictions_df['adjusted_cost_time'] + predictions_df['actual_cost_distance'], 2)

In [32]:
predictions_df

Unnamed: 0,vehicle_type,location,predicted_hourly,predicted_daily,actual_cost_distance,actual_cost_time,actual_revenue,booking_rates_hours,booking_rates_24hours,adjusted_hourly,adjusted_daily,adjusted_cost_time,adjusted_revenue
0,City,Newcastle,4.75,32.39,4.32,20.63,24.95,3.75,0,4.89,33.36,18.34,22.66
1,Everyday,Newcastle,5.95,47.6,4.8,17.0,21.8,2.5,0,6.13,49.03,15.32,20.12
2,Family,Newcastle,7.4,59.2,3.12,15.0,18.12,2.0,0,7.62,60.98,15.24,18.36
3,Van,Newcastle,7.7,61.6,2.16,11.25,13.41,1.25,0,7.93,63.45,9.91,12.07


## 4.5. Remove unwanted columns

In [33]:
predictions_df.drop(['predicted_hourly', 'predicted_daily', 'actual_cost_distance', 'actual_cost_time',
                     'booking_rates_hours', 'booking_rates_24hours', 'adjusted_cost_time'], axis=1, inplace=True)

new_order = ['location', 'vehicle_type', 'adjusted_hourly', 'adjusted_daily', 'actual_revenue', 'adjusted_revenue']
predictions_df = predictions_df[new_order]

# rename columns
predictions_df.rename(columns={'adjusted_hourly': 'hourly_rate', 'adjusted_daily': 'daily_rate'}, inplace=True)

In [34]:
predictions_df

Unnamed: 0,location,vehicle_type,hourly_rate,daily_rate,actual_revenue,adjusted_revenue
0,Newcastle,City,4.89,33.36,24.95,22.66
1,Newcastle,Everyday,6.13,49.03,21.8,20.12
2,Newcastle,Family,7.62,60.98,18.12,18.36
3,Newcastle,Van,7.93,63.45,13.41,12.07


---