## Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Load the dataset

In [3]:
df= pd.read_csv(r"C:\Users\sanoj\Downloads\dynamic_pricing (1).csv")
df.head()

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride
0,90,45,Urban,Silver,13,4.47,Night,Premium,90,284.257273
1,58,39,Suburban,Silver,72,4.06,Evening,Economy,43,173.874753
2,42,31,Rural,Silver,0,3.99,Afternoon,Premium,76,329.795469
3,89,28,Rural,Regular,67,4.31,Afternoon,Premium,134,470.201232
4,78,22,Rural,Regular,74,3.77,Afternoon,Economy,149,579.681422


In [4]:
df.describe()

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Number_of_Past_Rides,Average_Ratings,Expected_Ride_Duration,Historical_Cost_of_Ride
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,60.372,27.076,50.031,4.25722,99.588,372.502623
std,23.701506,19.068346,29.313774,0.435781,49.16545,187.158756
min,20.0,5.0,0.0,3.5,10.0,25.993449
25%,40.0,11.0,25.0,3.87,59.75,221.365202
50%,60.0,22.0,51.0,4.27,102.0,362.019426
75%,81.0,38.0,75.0,4.6325,143.0,510.497504
max,100.0,89.0,100.0,5.0,180.0,836.116419


# Ratio Based Approach

## Calculating Demand_Supply_Ratio and picking demand_supply_threshold = 2.3 arround the mean of Demand_Supply_Ratio
* ### Higher Demand = when 'Demand_Supply_Ratio' > demand_supply_threshold (2.3) else Low-demand
* ### Higher supply = when 'Demand_Supply_Ratio' < demand_supply_threshold (2.3) else Low-supply



In [5]:
df['Demand_Supply_Ratio'] = df['Number_of_Riders'] / df['Number_of_Drivers']
demand_supply_threshold = 2.3

df['Demand_class'] = np.where(df['Demand_Supply_Ratio'] > demand_supply_threshold, "Higher_demand", "Lower_demand")
df['Supply_class'] = np.where(df['Demand_Supply_Ratio'] < demand_supply_threshold, "Higher_supply", "Lower_supply")

df.iloc[:,[0,1,10,11,12]].sample(10)

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Demand_Supply_Ratio,Demand_class,Supply_class
463,20,10,2.0,Lower_demand,Higher_supply
82,81,23,3.521739,Higher_demand,Lower_supply
880,62,8,7.75,Higher_demand,Lower_supply
937,56,37,1.513514,Lower_demand,Higher_supply
265,77,30,2.566667,Higher_demand,Lower_supply
486,42,7,6.0,Higher_demand,Lower_supply
185,47,17,2.764706,Higher_demand,Lower_supply
13,36,24,1.5,Lower_demand,Higher_supply
665,79,13,6.076923,Higher_demand,Lower_supply
337,30,7,4.285714,Higher_demand,Lower_supply


## calulation Base Price and Surge_charge based on supply demand ratio and demand_supply_factor
* ### 1. Calculate base historical cost based on expected_Ride_duration
* ### 2. Calculate rider-to-driver ratio
* ### 3. Calculate demand-supply factor
* ### 4. Defining a methode to Calculate supply_demand_surge and Apply the dynamic pricing formula

In [6]:

constant_rate = 3.5  # Define the base rate per unit of duration, this is arround mean of ratio of ('Historical_Cost_of_Ride'/'Expected_Ride_Duration')
demand_hike = 0.35  # This is how much demand increase the pricing

# Calculate base historical cost based on expected_Ride_duration
df['base_cost'] = df['Expected_Ride_Duration'] * constant_rate

# Calculate rider-to-driver ratio
df['rider_driver_ratio'] = df['Number_of_Riders'] / df['Number_of_Drivers']

# Calculate demand-supply factor
df['demand_supply_factor'] = df['rider_driver_ratio'] - 1
df['demand_supply_factor'] = df['demand_supply_factor'].apply(lambda x: min(x, 6))

# defining a methode to Calculate supply_demand_surge and Apply the dynamic pricing formula
def apply_surge(df):
    SD_surge_charge=0
    if (df['Demand_class']=='Higher_demand') & (df['Supply_class']=='Lower_supply'):
        SD_surge_charge = df['base_cost'] * (demand_hike * df['demand_supply_factor'])
    return SD_surge_charge

df['S/D_surge_charge'] = df.apply(apply_surge,axis=1)
df.head()

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride,Demand_Supply_Ratio,Demand_class,Supply_class,base_cost,rider_driver_ratio,demand_supply_factor,S/D_surge_charge
0,90,45,Urban,Silver,13,4.47,Night,Premium,90,284.257273,2.0,Lower_demand,Higher_supply,315.0,2.0,1.0,0.0
1,58,39,Suburban,Silver,72,4.06,Evening,Economy,43,173.874753,1.487179,Lower_demand,Higher_supply,150.5,1.487179,0.487179,0.0
2,42,31,Rural,Silver,0,3.99,Afternoon,Premium,76,329.795469,1.354839,Lower_demand,Higher_supply,266.0,1.354839,0.354839,0.0
3,89,28,Rural,Regular,67,4.31,Afternoon,Premium,134,470.201232,3.178571,Higher_demand,Lower_supply,469.0,3.178571,2.178571,357.6125
4,78,22,Rural,Regular,74,3.77,Afternoon,Economy,149,579.681422,3.545455,Higher_demand,Lower_supply,521.5,3.545455,2.545455,464.609091


## Conditional Surge based on Vehical_Type and Time_of_booking && Location_Category Condition


In [7]:

def cal_surge_charge(df):
    surge_charge = 0
    if df['Vehicle_Type'] == 'Premium':
        if (df['Location_Category'] in ('Urban', 'Suburban')) & (df['Time_of_Booking'] in ('Evening', 'Night')):
            surge_charge = df['base_cost'] * 0.05 + df['base_cost'] * 0.02
    else:
        if (df['Location_Category'] in ('Urban', 'Suburban')) & (df['Time_of_Booking'] in ('Evening', 'Night')):
            surge_charge = df['base_cost'] * 0.025 + df['base_cost'] * 0.01
    return surge_charge

df['Surge_charge'] = df.apply(cal_surge_charge, axis=1)


In [8]:
df.sample(10)

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride,Demand_Supply_Ratio,Demand_class,Supply_class,base_cost,rider_driver_ratio,demand_supply_factor,S/D_surge_charge,Surge_charge
57,95,25,Suburban,Regular,72,4.16,Morning,Premium,128,413.587084,3.8,Higher_demand,Lower_supply,448.0,3.8,2.8,439.04,0.0
494,80,34,Suburban,Silver,84,3.8,Night,Economy,174,561.438522,2.352941,Higher_demand,Lower_supply,609.0,2.352941,1.352941,288.379412,21.315
950,44,18,Suburban,Gold,88,3.67,Night,Premium,50,199.839744,2.444444,Higher_demand,Lower_supply,175.0,2.444444,1.444444,88.472222,12.25
962,95,49,Rural,Regular,92,4.27,Night,Premium,99,332.093616,1.938776,Lower_demand,Higher_supply,346.5,1.938776,0.938776,0.0,0.0
802,87,77,Urban,Gold,12,3.56,Night,Premium,11,97.784506,1.12987,Lower_demand,Higher_supply,38.5,1.12987,0.12987,0.0,2.695
906,44,11,Rural,Silver,31,3.92,Afternoon,Premium,110,339.287073,4.0,Higher_demand,Lower_supply,385.0,4.0,3.0,404.25,0.0
431,85,25,Urban,Silver,21,4.03,Morning,Economy,73,323.557686,3.4,Higher_demand,Lower_supply,255.5,3.4,2.4,214.62,0.0
878,32,13,Urban,Regular,75,4.56,Evening,Premium,111,333.401167,2.461538,Higher_demand,Lower_supply,388.5,2.461538,1.461538,198.732692,27.195
330,63,53,Rural,Gold,33,4.33,Afternoon,Economy,175,489.712524,1.188679,Lower_demand,Higher_supply,612.5,1.188679,0.188679,0.0,0.0
838,88,48,Urban,Gold,52,4.52,Night,Economy,92,230.832586,1.833333,Lower_demand,Higher_supply,322.0,1.833333,0.833333,0.0,11.27


## Calculating Total cost

In [9]:
df['New_cost']= df['base_cost'] + df['S/D_surge_charge'] + df['Surge_charge']
df.iloc[:,[0,1,9,10,11,12,13,16,17,18]].sample(10)

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Historical_Cost_of_Ride,Demand_Supply_Ratio,Demand_class,Supply_class,base_cost,S/D_surge_charge,Surge_charge,New_cost
650,30,13,274.680375,2.307692,Higher_demand,Lower_supply,329.0,150.580769,0.0,479.580769
859,27,7,555.401937,3.857143,Higher_demand,Lower_supply,420.0,420.0,0.0,840.0
972,58,10,459.060651,5.8,Higher_demand,Lower_supply,483.0,811.44,33.81,1328.25
341,48,24,92.031047,2.0,Lower_demand,Higher_supply,101.5,0.0,0.0,101.5
141,93,42,674.648504,2.214286,Lower_demand,Higher_supply,560.0,0.0,0.0,560.0
390,27,15,315.265403,1.8,Lower_demand,Higher_supply,360.5,0.0,0.0,360.5
156,94,59,665.729534,1.59322,Lower_demand,Higher_supply,612.5,0.0,0.0,612.5
614,69,51,470.182714,1.352941,Lower_demand,Higher_supply,476.0,0.0,33.32,509.32
984,34,7,516.511173,4.857143,Higher_demand,Lower_supply,521.5,704.025,36.505,1262.03
193,61,26,604.273047,2.346154,Higher_demand,Lower_supply,441.0,207.778846,0.0,648.778846


## Revenue Before and after

In [10]:
print("Revenue before applying Dynamic_pricing -->",round(sum(df['Historical_Cost_of_Ride']),2))
print("Revenue after applying Dynamic_pricing-->",round(sum(df['New_cost']),2))

Revenue before applying Dynamic_pricing --> 372502.62
Revenue after applying Dynamic_pricing--> 552298.01


In [11]:
diff=sum(df['New_cost'])-sum(df['Historical_Cost_of_Ride'])
print("Diffrenece of Revenue--> ", diff)
print("Revenue Percentage --> ", diff/sum(df['Historical_Cost_of_Ride'])*100)

Diffrenece of Revenue-->  179795.39090132003
Revenue Percentage -->  48.26687911203326


## Conclusion
* ### Diffrenece of Revenue-->  179795.39
* ### Revenue Percentage -->  48.26

In [12]:
filter=df['Demand_Supply_Ratio']>10
df[filter].head(10)

Unnamed: 0,Number_of_Riders,Number_of_Drivers,Location_Category,Customer_Loyalty_Status,Number_of_Past_Rides,Average_Ratings,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration,Historical_Cost_of_Ride,Demand_Supply_Ratio,Demand_class,Supply_class,base_cost,rider_driver_ratio,demand_supply_factor,S/D_surge_charge,Surge_charge,New_cost
49,67,6,Rural,Gold,15,3.53,Night,Economy,123,420.623911,11.166667,Higher_demand,Lower_supply,430.5,11.166667,6.0,904.05,0.0,1334.55
88,66,6,Rural,Regular,23,4.2,Evening,Economy,45,173.157754,11.0,Higher_demand,Lower_supply,157.5,11.0,6.0,330.75,0.0,488.25
94,95,7,Rural,Gold,40,4.68,Evening,Economy,95,283.466443,13.571429,Higher_demand,Lower_supply,332.5,13.571429,6.0,698.25,0.0,1030.75
153,51,5,Urban,Gold,0,4.59,Afternoon,Premium,92,320.857622,10.2,Higher_demand,Lower_supply,322.0,10.2,6.0,676.2,0.0,998.2
170,76,7,Urban,Gold,76,4.35,Morning,Economy,72,245.893571,10.857143,Higher_demand,Lower_supply,252.0,10.857143,6.0,529.2,0.0,781.2
197,75,7,Suburban,Gold,100,4.13,Morning,Economy,134,453.376949,10.714286,Higher_demand,Lower_supply,469.0,10.714286,6.0,984.9,0.0,1453.9
216,88,5,Urban,Silver,89,3.59,Night,Economy,27,70.203803,17.6,Higher_demand,Lower_supply,94.5,17.6,6.0,198.45,3.3075,296.2575
218,65,5,Rural,Silver,24,3.54,Night,Economy,119,301.403927,13.0,Higher_demand,Lower_supply,416.5,13.0,6.0,874.65,0.0,1291.15
232,87,5,Urban,Silver,59,4.32,Night,Economy,42,151.359301,17.4,Higher_demand,Lower_supply,147.0,17.4,6.0,308.7,5.145,460.845
250,97,7,Urban,Silver,22,3.74,Afternoon,Premium,147,441.746701,13.857143,Higher_demand,Lower_supply,514.5,13.857143,6.0,1080.45,0.0,1594.95


# Data Spliting and Model training

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


In [14]:
x=df[['Demand_Supply_Ratio','Location_Category','Time_of_Booking','Vehicle_Type','Expected_Ride_Duration']] #Train column
y=df['New_cost'] #Target column

In [15]:
X_train, X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [16]:
X_train

Unnamed: 0,Demand_Supply_Ratio,Location_Category,Time_of_Booking,Vehicle_Type,Expected_Ride_Duration
29,1.500000,Suburban,Night,Premium,131
535,2.625000,Urban,Night,Premium,84
695,1.312500,Rural,Morning,Economy,70
557,1.521739,Suburban,Afternoon,Economy,164
836,1.296875,Suburban,Evening,Economy,109
...,...,...,...,...,...
106,1.571429,Rural,Afternoon,Economy,60
270,6.923077,Suburban,Afternoon,Economy,126
860,2.750000,Rural,Night,Premium,122
435,2.909091,Suburban,Afternoon,Premium,32


In [17]:
y_train

29      490.595000
535     481.792500
695     245.000000
557     574.000000
836     394.852500
          ...     
106     210.000000
270    1355.226923
860     688.537500
435     186.836364
102     458.500000
Name: New_cost, Length: 800, dtype: float64

In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

In [19]:
ohe = OneHotEncoder()
ohe.fit(x[['Location_Category','Time_of_Booking','Vehicle_Type']])

In [20]:
cat=ohe.categories_

In [21]:
column_trans = make_column_transformer((OneHotEncoder(categories=cat),
                                        ['Location_Category','Time_of_Booking','Vehicle_Type']),
                                        remainder='passthrough',)                                        

In [22]:
lr=LinearRegression()

In [23]:
pipe=make_pipeline(column_trans,lr)

In [24]:
pipe.fit(X_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [25]:
y_pred=pipe.predict(X_test)
y_pred

array([ 736.304414  ,  521.0745563 ,  987.22476923,  395.38026094,
        340.93018233,  540.88829265,  432.81434573,  116.85429583,
        701.00050633,  176.64210048,  391.51538031,  204.61470217,
        201.86140097,  889.36034349,  481.93877459,  496.31992514,
        864.54840188,  629.49522573,  121.85520434,  450.62152732,
        618.01043887,  304.5906365 ,  301.43493956,  462.25127886,
        291.42927816,  900.83972581,   97.1450939 , 1429.48030068,
        195.96851618, 1138.52929135,  378.44006163,   23.05492584,
        542.64183284,  103.5267465 ,  534.66070383,  374.39173443,
        115.03933839,  300.39731744,  519.38644318,  458.00646174,
        881.22069619,  813.25518303, 1021.70330842,  939.77212674,
       1324.73720154,  501.00737221,  427.18853856,  979.59495554,
        328.56902124,  560.36647518,  460.45710732,   86.33266403,
        662.77340968,  248.82181752,  135.17329038,  462.35768115,
        416.24004459, 1043.35369048,  868.01887533,  713.39286

# Model Evaluation & Check prediction 

In [26]:
from sklearn.metrics import mean_absolute_percentage_error

In [27]:
mape = mean_absolute_percentage_error(y_test,y_pred)
print("Error of Linear Regression Model = %.2f"%(mape*100),'%')
print("Accuracy of Linear Regression Model = %.2f"%((1 - mape)*100),'%')

Error of Linear Regression Model = 30.70 %
Accuracy of Linear Regression Model = 69.30 %


In [28]:
r2 = r2_score(y_test,y_pred)
print("R2 score of Linear Regression = %.2f"%(r2))

R2 score of Linear Regression = 0.84


In [29]:
pipe.predict(pd.DataFrame([['2.0','Urban','Night','Premium','90']],columns=['Demand_Supply_Ratio','Location_Category','Time_of_Booking','Vehicle_Type','Expected_Ride_Duration']))

array([411.59386835])