### Working with map
### plotting latitude and longitude of restaurant and delivery location on real world map

In [1]:
import pandas as pd
from math import radians, sin, cos, sqrt, atan2
import folium 

In [2]:
df = pd.read_csv("Notebook/Data/semi_cleaned_data.csv")
df.head()

Unnamed: 0,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude,Time_Orderd,Time_Order_picked,Weather_conditions,...,year,month,day,Time_Orderd_Hour,Time_Orderd_min,Time_Category,Time_Order_picked_Hour,Time_Order_picked_min_Hour,Time_Order_picked_min,Delivery_city
0,DEHRES17DEL01,36.0,4.2,30.327968,78.046106,30.397968,78.116106,21:55,22:10,Fog,...,2022,2,12,21,55,Dinner,22,22,10,DEH
1,KOCRES16DEL01,21.0,4.7,10.003064,76.307589,10.043064,76.347589,14:55,15:05,Stormy,...,2022,2,13,14,55,Lunch,15,15,5,KOC
2,PUNERES13DEL03,23.0,4.7,18.56245,73.916619,18.65245,74.006619,17:30,17:40,Sandstorms,...,2022,3,4,17,30,After Lunch,17,17,40,PUNE
3,LUDHRES15DEL02,34.0,4.3,30.899584,75.809346,30.919584,75.829346,09:20,09:30,Sandstorms,...,2022,2,13,9,20,Breakfast,9,9,30,LUDH
4,KNPRES14DEL02,24.0,4.7,26.463504,80.372929,26.593504,80.502929,19:50,20:05,Fog,...,2022,2,14,19,50,Evening Snacks,20,20,5,KNP


In [3]:
#latitude and longitude 
#plotting the latitude and longitude of restaurant 

import plotly.express as px 
datal = df.copy()
datal.dropna(axis=0, thresh=None, subset = None, inplace = True)
color_scale = [(0, 'orange'), (1, 'red')]
fig = px.scatter_mapbox(datal, lat="Restaurant_latitude", lon = "Restaurant_longitude", color_continuous_scale = color_scale, zoom=8,height=1000, width=1000)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0, "t":0, "l":0, "b":0})
fig.show()

In [4]:
# from geopy.geocoders import Nominatim
# from folium.plugins import MarkerCluster

# # Assuming your DataFrame has columns named 'Restaurant_latitude', 'Restaurant_longitude',
# # 'Delivery_location_latitude', 'Delivery_location_longitude'

# def get_coordinates(address):
#     geolocator = Nominatim(user_agent="food_delivery_map")
#     location = geolocator.geocode(address)
#     return (location.latitude, location.longitude)

# # Create a map object
# food_delivery_map = folium.Map(location=[df['Restaurant_latitude'].mean(), df['Restaurant_longitude'].mean()], zoom_start=12)

# # Create a marker cluster object
# marker_cluster = MarkerCluster().add_to(food_delivery_map)

# # Iterate through the DataFrame and add markers for restaurants and delivery destinations
# for index, row in df.iterrows():
#     # Get restaurant coordinates
#     restaurant_coords = (row['Restaurant_latitude'], row['Restaurant_longitude'])
    
#     # Get delivery destination coordinates
#     delivery_coords = (row['Delivery_location_latitude'], row['Delivery_location_longitude'])
    
#     # Add markers for restaurant and delivery destination to the marker cluster
#     folium.Marker(location=restaurant_coords, popup='Restaurant').add_to(marker_cluster)
#     folium.Marker(location=delivery_coords, popup='Delivery Destination').add_to(marker_cluster)

# # Save the map to an HTML file
# food_delivery_map.save('food_delivery_map.html')

# # Display the map
# food_delivery_map


#### latitude of india is 8°4' N to 37°6' North
#### Longitude of India is 68°7' to 97°25' East

#### this dataset is related to india but in some of latitude and longitude the values are given in negative. negative latitude is related to indian ocean.

#### so we need to change the negative sign to positive one. 

In [5]:
#changing -ve latitude to +ve 

df['Restaurant_latitude'] = df['Restaurant_latitude'].abs()

In [6]:
import math

def distance(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2*math.atan2(math.sqrt(a), math.sqrt(1-a))
    R = 6371 # earth's radius in km
    dist = R * c
    return dist

# Make sure to use the correct column names from your DataFrame
df['distance'] = df.apply(lambda row: distance(row['Restaurant_latitude'], row['Restaurant_longitude'], row['Delivery_location_latitude'], row['Delivery_location_longitude']), axis=1)


In [7]:
df['distance'].head()

0    10.280582
1     6.242319
2    13.787860
3     2.930258
4    19.396618
Name: distance, dtype: float64

In [8]:
df.groupby(['Restaurant_latitude',	'Restaurant_longitude',	'Delivery_location_latitude', 'Delivery_location_longitude'])['distance'].max().head(40)

Restaurant_latitude  Restaurant_longitude  Delivery_location_latitude  Delivery_location_longitude
0.000000             0.000000              0.010000                    0.010000                        1.572534
                                           0.020000                    0.020000                        3.145067
                                           0.030000                    0.030000                        4.717601
                                           0.040000                    0.040000                        6.290135
                                           0.050000                    0.050000                        7.862668
                                           0.060000                    0.060000                        9.435202
                                           0.070000                    0.070000                       11.007735
                                           0.080000                    0.080000                       12.580268
     

#### latitude of india is 8°4' N to 37°6' North
#### Longitude of India is 68°7' to 97°25' East

- dropping those latitude and longitude which are less than 8° and more than 37°6

In [9]:
df[(df['Restaurant_latitude']<8)&(df['Delivery_location_latitude']<8)].shape

(3509, 29)

### There are 3509 rows that have less than 8 latitude. We need to remove them all.

In [10]:
## dropping unnecessary rows
df =df.drop(df[(df['Restaurant_latitude']<8) & (df['Delivery_location_latitude']<8)].index)

### Similarly applying the same equation for latitude greater than 37 and less than 68° longitude

In [11]:
df[(df['Restaurant_latitude']>37)&(df['Delivery_location_latitude']>37)].shape

(0, 29)

In [12]:
df[(df['Restaurant_longitude']<68)&(df['Delivery_location_longitude']<68)].shape

(0, 29)

### There are no rows greater than 37° latitude and less than 68° longitude

### Calculating minium and maximum distance btw a restaurant and food delivery location

In [13]:
min_distance = df.groupby('distance')[['Restaurant_latitude',	'Restaurant_longitude',	'Delivery_location_latitude', 'Delivery_location_longitude']].min().sort_values(by='distance', ascending=True).head(1)
min_distance

Unnamed: 0_level_0,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude
distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.465067,30.914057,75.83982,30.924057,75.84982


In [14]:
max_distance = df.groupby('distance')[['Restaurant_latitude',	'Restaurant_longitude',	'Delivery_location_latitude', 'Delivery_location_longitude']].min().sort_values(by='distance', ascending=False).head(1)
max_distance

Unnamed: 0_level_0,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude
distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20.969489,25.443994,81.860187,25.583994,82.000187


In [15]:
df.drop(['Restaurant_latitude',	'Restaurant_longitude',	'Delivery_location_latitude', 'Delivery_location_longitude',
        'year','month', 'day', 'Time_Orderd_min', 'Time_Order_picked_Hour', 'Time_Order_picked_min_Hour', 'Time_Order_picked_min',
        'Delivery_person_ID', 'Time_Orderd','Time_Order_picked', 'Time_Category'], axis=1, inplace=True)

In [16]:
df.head()

Unnamed: 0,Delivery_person_Age,Delivery_person_Ratings,Weather_conditions,Road_traffic_density,Vehicle_condition,Type_of_order,Type_of_vehicle,multiple_deliveries,Festival,City,Time_taken (min),Time_Orderd_Hour,Delivery_city,distance
0,36.0,4.2,Fog,Jam,2,Snack,motorcycle,3.0,No,Metropolitian,46,21,DEH,10.280582
1,21.0,4.7,Stormy,High,1,Meal,motorcycle,1.0,No,Metropolitian,23,14,KOC,6.242319
2,23.0,4.7,Sandstorms,Medium,1,Drinks,scooter,1.0,No,Metropolitian,21,17,PUNE,13.78786
3,34.0,4.3,Sandstorms,Low,0,Buffet,motorcycle,0.0,No,Metropolitian,20,9,LUDH,2.930258
4,24.0,4.7,Fog,Jam,1,Snack,scooter,1.0,No,Metropolitian,41,19,KNP,19.396618


In [17]:
df.to_csv('fully_cleaned_data.csv', index=False)