In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import holidays
import requests
from geopy.geocoders import Nominatim

In [8]:
completed_df = pd.read_csv('../data/nb.csv')
df = pd.read_csv('../data/driver_locations_during_request.csv')

In [11]:
completed_df.columns

Index(['Trip ID', 'Trip Origin', 'Trip Destination', 'Trip Start Time',
       'Trip End Time'],
      dtype='object')

In [12]:
df.columns

Index(['id', 'order_id', 'driver_id', 'driver_action', 'lat', 'lng',
       'created_at', 'updated_at'],
      dtype='object')

In [37]:
df = df.drop(['updated_at', 'created_at'], axis=1)

Feature Extraction

In [16]:
# Convert timestamp to datetime
completed_df['datetime'] = pd.to_datetime(completed_df['Trip Start Time'])

In [18]:
# Create hour of day
completed_df['hour'] = completed_df['datetime'].dt.hour

In [20]:
# Create weekend vs. weekday feature
completed_df['is_weekend'] = completed_df['datetime'].dt.weekday >= 5

In [28]:
# Add holiday feature
nigeria_holidays = holidays.CountryHoliday('NG')

In [30]:
# Create a function to check for holidays
def is_holiday(date):
    return date in nigeria_holidays

In [31]:
completed_df['is_holiday'] = completed_df['datetime'].apply(is_holiday)

In [32]:
completed_df

Unnamed: 0,Trip ID,Trip Origin,Trip Destination,Trip Start Time,Trip End Time,datetime,hour,is_weekend,is_holiday
0,391996,"6.508813001668548,3.37740316890347","6.650969799999999,3.3450307",2021-07-01 07:28:04,2021-07-01 07:29:37,2021-07-01 07:28:04,7.0,False,False
1,391997,"6.4316714,3.4555375","6.4280814653326,3.4721885847586",2021-07-01 06:38:04,2021-07-01 07:07:28,2021-07-01 06:38:04,6.0,False,False
2,391998,"6.631679399999999,3.3388976","6.508324099999999,3.3590397",2021-07-01 06:21:02,2021-07-01 07:02:23,2021-07-01 06:21:02,6.0,False,False
3,391999,"6.572757200000001,3.3677082","6.584881099999999,3.3614073",2021-07-01 07:16:07,2021-07-01 07:29:42,2021-07-01 07:16:07,7.0,False,False
4,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False
...,...,...,...,...,...,...,...,...,...
536015,1637696,"6.448218499999999,3.4772075","6.437787399999999,3.481670199999999",2021-12-30 20:35:06,2021-12-30 21:02:59,2021-12-30 20:35:06,20.0,False,False
536016,1637702,"6.442320899999999,3.4736868","6.436589333407897,3.5559738188407835",2021-12-30 20:48:13,2021-12-30 21:43:49,2021-12-30 20:48:13,20.0,False,False
536017,1637704,"6.4281982,3.492248","6.448088500000001,3.4775747",2021-12-30 20:51:45,2021-12-30 21:41:32,2021-12-30 20:51:45,20.0,False,False
536018,1637705,"6.5869296,3.3632966","6.637906899999999,3.3339515",2021-12-30 20:48:50,2021-12-30 21:08:28,2021-12-30 20:48:50,20.0,False,False


In [35]:
completed_df['is_holiday'].nunique()

2

In [38]:
merged_df = pd.merge(completed_df, df, left_on='Trip ID', right_on='order_id')

In [39]:
merged_df.head()

Unnamed: 0,Trip ID,Trip Origin,Trip Destination,Trip Start Time,Trip End Time,datetime,hour,is_weekend,is_holiday,id,order_id,driver_id,driver_action,lat,lng
0,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,1,392001,243828,accepted,6.602207,3.270465
1,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,2,392001,243588,rejected,6.592097,3.287445
2,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,3,392001,243830,rejected,6.596133,3.281784
3,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,4,392001,243539,rejected,6.596142,3.280526
4,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,5,392001,171653,rejected,6.609232,3.2888


In [40]:
merged_df = merged_df.drop(['id', 'order_id'], axis=1)

In [41]:
merged_df.head()

Unnamed: 0,Trip ID,Trip Origin,Trip Destination,Trip Start Time,Trip End Time,datetime,hour,is_weekend,is_holiday,driver_id,driver_action,lat,lng
0,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,243828,accepted,6.602207,3.270465
1,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,243588,rejected,6.592097,3.287445
2,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,243830,rejected,6.596133,3.281784
3,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,243539,rejected,6.596142,3.280526
4,392001,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,2021-07-01 09:30:59,9.0,False,False,171653,rejected,6.609232,3.2888


In [46]:
import os
from dotenv import WEATHER_API_KEY

ImportError: cannot import name 'API_KEY' from 'dotenv' (/home/grace-nyutu/Documents/Logistic_optimization/venv/lib/python3.12/site-packages/dotenv/__init__.py)

In [44]:
# Function to get weather data 
def get_weather(lat, lon, timestamp):
    API_KEY = 'API_KEY'
    url = f'https://api.weather.com/v1/location/{lat},{lon}/observations/historical.json?apiKey={API_KEY}&units=e&startDate={timestamp.strftime("%Y%m%d")}&endDate={timestamp.strftime("%Y%m%d")}'
    response = requests.get(url)
    data = response.json()
    # Check if it was raining
    rain = any(obs['wx_phrase'].lower() == 'rain' for obs in data['observations'])
    return rain

In [45]:
merged_df['is_rain'] = merged_df.apply(lambda x: get_weather(x['lat'], x['lng'], x['datetime']), axis=1)

KeyError: 'observations'