# Geospatial Features
In this notebook we will learn about geographic data and how to work with them

In [2]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from datetime import datetime

%matplotlib inline
sns.set(rc={'figure.figsize':(10, 10)}, font_scale=1.2)

### 1) Work with Lat and Long

we will use this library: https://pypi.org/project/geopy/

**1) Measuring Distance**

In [3]:
# import great_circle from geopy.distance
from geopy.distance import great_circle

# create 2 vars for home and cafe with lats and longs of (30.109919, 31.308797) & (30.120982, 31.322026)
my_home = (30.109919, 31.308797) # (lat, long)
my_cafe = (30.120982, 31.322026)

# calcaulate the distance   
great_circle(my_home, my_cafe).kilometers  # miles

1.7698508016026915

***So Lets use it with Our data***

In [4]:
# read sendy_logistics dataset
df = pd.read_csv('../dastasets/sendy_logistics.csv')
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Arrival at Destination - Time,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival
0,Order_No_4211,User_Id_633,Bike,3,Business,9,5,9:35:46 AM,9,5,...,10:39:55 AM,4,20.4,,-1.317755,36.83037,-1.300406,36.829741,Rider_Id_432,745
1,Order_No_25375,User_Id_2285,Bike,3,Personal,12,5,11:16:16 AM,12,5,...,12:17:22 PM,16,26.4,,-1.351453,36.899315,-1.295004,36.814358,Rider_Id_856,1993
2,Order_No_1899,User_Id_265,Bike,3,Business,30,2,12:39:25 PM,30,2,...,1:00:38 PM,3,,,-1.308284,36.843419,-1.300921,36.828195,Rider_Id_155,455
3,Order_No_9336,User_Id_1402,Bike,3,Business,15,5,9:25:34 AM,15,5,...,10:05:27 AM,9,19.2,,-1.281301,36.832396,-1.257147,36.795063,Rider_Id_855,1341
4,Order_No_27883,User_Id_1737,Bike,1,Personal,13,1,9:55:18 AM,13,1,...,10:25:37 AM,9,15.4,,-1.266597,36.792118,-1.295041,36.809817,Rider_Id_770,1214


In [5]:
# create a fn to extract the distance from lat & long in each row
def get_dist(x):
    loc_1 = (x['Pickup Lat'], x['Pickup Long'])
    loc_2 = (x['Destination Lat'], x['Destination Long'])
    return great_circle(loc_1, loc_2).kilometers

# apply the fn to df and create new column for distance
df['Dist_Pick_Dest'] = df.apply(get_dist, axis=1)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival,Dist_Pick_Dest
0,Order_No_4211,User_Id_633,Bike,3,Business,9,5,9:35:46 AM,9,5,...,4,20.4,,-1.317755,36.83037,-1.300406,36.829741,Rider_Id_432,745,1.930336
1,Order_No_25375,User_Id_2285,Bike,3,Personal,12,5,11:16:16 AM,12,5,...,16,26.4,,-1.351453,36.899315,-1.295004,36.814358,Rider_Id_856,1993,11.339865
2,Order_No_1899,User_Id_265,Bike,3,Business,30,2,12:39:25 PM,30,2,...,3,,,-1.308284,36.843419,-1.300921,36.828195,Rider_Id_155,455,1.880081
3,Order_No_9336,User_Id_1402,Bike,3,Business,15,5,9:25:34 AM,15,5,...,9,19.2,,-1.281301,36.832396,-1.257147,36.795063,Rider_Id_855,1341,4.943465
4,Order_No_27883,User_Id_1737,Bike,1,Personal,13,1,9:55:18 AM,13,1,...,9,15.4,,-1.266597,36.792118,-1.295041,36.809817,Rider_Id_770,1214,3.724834


### 2) Geocoding features to get the address from lat, long or reverse.

we will use this library: https://pypi.org/project/geopy/

In [6]:
# import Nominatim from geopy.geocoders
from geopy.geocoders import Nominatim

# create an object from Nominatim with user_agent = Rowad
geolocator = Nominatim(user_agent="Data Science course")

In [7]:
# give a location to the object using .geocode method
location = geolocator.geocode("Cairo Festival, Cairo, Egypt")

# show the location information data using .raw method
location.raw

{'place_id': 43010744,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 26753320,
 'lat': '30.026992800000002',
 'lon': '31.40906325945414',
 'class': 'place',
 'type': 'neighbourhood',
 'place_rank': 20,
 'importance': 0.25000999999999995,
 'addresstype': 'neighbourhood',
 'name': 'كايرو فستيفال سيتى',
 'display_name': 'كايرو فستيفال سيتى, القاهرة, مصر',
 'boundingbox': ['30.0164179', '30.0374523', '31.4019316', '31.4220303']}

In [8]:
# show lat & long
location.raw['lat'], location.raw['lon']

('30.026992800000002', '31.40906325945414')

In [9]:
# show the location information using .reverse with lat & long
location = geolocator.reverse("29.981650417493046, 31.428900499194086")
location.raw

{'place_id': 42837656,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 338687680,
 'lat': '29.98171225560333',
 'lon': '31.42896290016677',
 'class': 'highway',
 'type': 'residential',
 'place_rank': 26,
 'importance': 0.10000999999999993,
 'addresstype': 'road',
 'name': '',
 'display_name': 'التجمع الثالث, القاهرة, 11936, مصر',
 'address': {'neighbourhood': 'التجمع الثالث',
  'village': 'التجمع الثالث',
  'city': 'القاهرة',
  'state': 'القاهرة',
  'ISO3166-2-lvl4': 'EG-C',
  'postcode': '11936',
  'country': 'مصر',
  'country_code': 'eg'},
 'boundingbox': ['29.9785046', '29.9825297', '31.4280647', '31.4292666']}

In [10]:
# show country name from location information
location.raw['address']['country']

'مصر'

In [11]:
# show city name from location information
location.raw['address']['city']

'القاهرة'

***So Lets use it with Our data***

In [12]:
# take a sample of size 200 from sendy_logistics dataset
df = pd.read_csv('../dastasets/sendy_logistics.csv').sample(200)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Arrival at Destination - Time,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival
8557,Order_No_24983,User_Id_60,Bike,3,Business,28,4,1:48:44 PM,28,4,...,2:31:50 PM,15,27.2,,-1.303596,36.778378,-1.229662,36.843827,Rider_Id_484,1757
11901,Order_No_27965,User_Id_393,Bike,3,Business,11,1,1:24:39 PM,11,1,...,1:59:29 PM,11,27.9,,-1.316711,36.830156,-1.263818,36.793006,Rider_Id_619,2
18234,Order_No_26810,User_Id_1603,Bike,1,Personal,28,1,4:51:12 PM,28,1,...,5:49:03 PM,7,22.5,,-1.280677,36.837076,-1.322019,36.840017,Rider_Id_208,1896
4352,Order_No_9984,User_Id_1363,Bike,3,Business,2,5,9:00:32 AM,2,5,...,9:18:32 AM,3,17.9,16.9,-1.300406,36.829741,-1.304713,36.808955,Rider_Id_69,558
6096,Order_No_24392,User_Id_2593,Bike,3,Business,7,2,10:57:21 AM,7,2,...,11:11:26 AM,1,23.3,,-1.261589,36.792873,-1.255189,36.782203,Rider_Id_449,389


In [13]:
# create a fn to extract the city name from the data
def get_city(x):
    try:
        location = geolocator.reverse(f"{x['Pickup Lat']}, {x['Pickup Long']}")
        return location.raw['address']['city']
    except:
        return np.nan

# create a new column called city
df['City'] = df.apply(get_city, axis=1)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival,City
8557,Order_No_24983,User_Id_60,Bike,3,Business,28,4,1:48:44 PM,28,4,...,15,27.2,,-1.303596,36.778378,-1.229662,36.843827,Rider_Id_484,1757,Nairobi
11901,Order_No_27965,User_Id_393,Bike,3,Business,11,1,1:24:39 PM,11,1,...,11,27.9,,-1.316711,36.830156,-1.263818,36.793006,Rider_Id_619,2,Nairobi
18234,Order_No_26810,User_Id_1603,Bike,1,Personal,28,1,4:51:12 PM,28,1,...,7,22.5,,-1.280677,36.837076,-1.322019,36.840017,Rider_Id_208,1896,Nairobi
4352,Order_No_9984,User_Id_1363,Bike,3,Business,2,5,9:00:32 AM,2,5,...,3,17.9,16.9,-1.300406,36.829741,-1.304713,36.808955,Rider_Id_69,558,Nairobi
6096,Order_No_24392,User_Id_2593,Bike,3,Business,7,2,10:57:21 AM,7,2,...,1,23.3,,-1.261589,36.792873,-1.255189,36.782203,Rider_Id_449,389,Nairobi


In [14]:
# show value counts
df['City'].value_counts()

City
Nairobi                    192
Syokimau-Mulolongo ward      5
Roysambu ward                1
Matathani ward               1
Kiambu Township ward         1
Name: count, dtype: int64

# Great Work!