In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

# Problem Statement

- Objective: Build a model that compares prices of similar products accross vendors and recommends the best deal based on users preference (e.g. Procimity, Budget, Qaulity, etc.)

# Data

- Dataset: A synthetic dataset generated to compare prices of similar products accros vendors.


# Model Selection

A supervised learning approach will be best for comparison of prices.

- Algorithm: 
    - Ranking Models: Pairwise Logistic Regression or Learning to Rank modes (e.g. XGBoost with ranking objectives.)
    - Collaborive Filtering Models: Recommend vendors or products based on user preferences.

- Features:
    - Price, Discount, Vendor Distance, Product Ratings
    - User Preferences: Procimity, Budget, Quality, Preferred Vendor.

In [2]:
# load dataset

df = pd.read_csv('../../data/synthetic/food_items.csv')
df.head()

Unnamed: 0,Updated At,Item Name,Category,Unit,Price (₦),Vendor Name,Venor Id,Vendor Location,Vendor Rating,Promotion,In Stock,Id
0,2022-12-31 10:45:36.823923136,Fruit Bar,Snacks,200g pack,79554,Vendor 1,1,Ogun State,4,6.662236,True,0
1,2022-07-08 12:32:03.005505776,Chocolates,Fruits,1 bottle,11277,Vendor 1,1,Ogun State,4,3.974454,False,1
2,2022-11-16 12:27:15.927885136,Chocolate Bar,Vegetables,1 can,132594,Vendor 1,1,Ogun State,4,7.516402,True,2
3,2022-02-15 08:48:48.813559322,Beans (Brown),Staples,1 box,130554,Vendor 1,1,Ogun State,4,6.689888,False,3
4,2022-09-16 21:33:02.467882976,Whole Wheat Bread,Beverages,1 bar,148704,Vendor 1,1,Ogun State,4,3.783152,True,4


In [3]:
# Convert the location to GPS coordinates with the help of geopy.geocoders
from geopy.geocoders import Nominatim

In [4]:
geolocator = Nominatim(user_agent="location_finder")

In [9]:
locations = df['Vendor Location'].unique()

In [6]:
gps_coordinates = {}
for location in locations:
    loc = geolocator.geocode(location)
    gps_coordinates[location] = (loc.latitude, loc.longitude) if loc else None

gps_coordinates

{'Ogun State': (6.9788582, 3.4389293),
 'Ikeja, Lagos': (6.5960605, 3.340787),
 'Onitsha': (6.1461619, 6.8019088),
 'Jos': (9.9175125, 8.8979401),
 'Yaba, Lagos': (6.5114693, 3.3699982),
 'Kaduna': (10.5182899, 7.4359863),
 'Lekki, Lagos': (6.4711251, 3.8147504423714778),
 'Benin City': (6.3330586, 5.6221058),
 'Calabar': (4.9795999, 8.3373597),
 'Abuja': (9.0643305, 7.4892974),
 'Ibadan': (7.3786064, 3.8969928),
 'Kano': (11.9918671, 8.5303654),
 'Asaba': (6.1858825, 6.7297071),
 'Surulere, Lagos': (6.4855737, 3.353990695126856),
 'Enugu': (6.4499833, 7.5000007),
 'Makurdi': (7.7312634, 8.538425),
 'Port Harcourt': (4.7676576, 7.0188527),
 'Owerri': (5.489736, 7.0341973),
 'Abeokuta': (7.161, 3.348),
 'Ajah, Lagos': (6.4694716, 3.5623861)}

In [10]:
df['Vendor Longitude'] = df['Vendor Location'].map(gps_coordinates).map(lambda x: x[1])
df['Vendor Latitude'] = df['Vendor Location'].map(gps_coordinates).map(lambda x: x[0])

In [11]:
df.head()

Unnamed: 0,Updated At,Item Name,Category,Unit,Price (₦),Vendor Name,Venor Id,Vendor Location,Vendor Rating,Promotion,In Stock,Id,Vendor Longitude,Vendor Latitude
0,2022-12-31 10:45:36.823923136,Fruit Bar,Snacks,200g pack,79554,Vendor 1,1,Ogun State,4,6.662236,True,0,3.438929,6.978858
1,2022-07-08 12:32:03.005505776,Chocolates,Fruits,1 bottle,11277,Vendor 1,1,Ogun State,4,3.974454,False,1,3.438929,6.978858
2,2022-11-16 12:27:15.927885136,Chocolate Bar,Vegetables,1 can,132594,Vendor 1,1,Ogun State,4,7.516402,True,2,3.438929,6.978858
3,2022-02-15 08:48:48.813559322,Beans (Brown),Staples,1 box,130554,Vendor 1,1,Ogun State,4,6.689888,False,3,3.438929,6.978858
4,2022-09-16 21:33:02.467882976,Whole Wheat Bread,Beverages,1 bar,148704,Vendor 1,1,Ogun State,4,3.783152,True,4,3.438929,6.978858
