## Exploratory Data Analysis

**Import essential packages**

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib as plt
import math
import folium

# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
# https://pypi.org/project/reverse_geocode/1.0/
# https://github.com/thampiman/reverse-geocoder
import reverse_geocode

#Defining common variables
random_state = 42

**Import the main dataset**

In [2]:
#When working locally
poi_df = pd.read_csv('./datasets/poi.csv')

In [3]:
# poi_df_sample_3000 = poi_df.sample(n=3000, random_state=random_state)
# poi_df_sample_3000.to_csv('./datasets/poi_sample_3k.csv')

In [4]:
#When working remotely
# poi_df = pd.read_csv('./datasets/poi_sample_3k.csv')

In [5]:
poi_df.head(3)

Unnamed: 0,name,latitude_radian,longitude_radian,num_links,links,num_categories,categories
0,"YAYCHI, WEST AZERBAIJAN",0.683175,0.778053,13,Baba Jik Rural District; West Azerbaijan Provi...,1,POPULATED PLACES IN CHALDORAN COUNTY
1,MOUNT FISKE GLACIER,0.648196,-2.071114,9,Mount Fiske; Mount Warlow Glacier; U.S. state;...,3,GLACIERS OF THE SIERRA NEVADA (U.S.); GLACIERS...
2,ALATONA,0.258356,-0.103606,10,Diabaly; Alatona Irrigation Project; Mali; Nio...,2,POPULATED PLACES IN SÉGOU REGION; IRRIGATION P...


In [6]:
poi_df['categories'].head(3)

0                 POPULATED PLACES IN CHALDORAN COUNTY
1    GLACIERS OF THE SIERRA NEVADA (U.S.); GLACIERS...
2    POPULATED PLACES IN SÉGOU REGION; IRRIGATION P...
Name: categories, dtype: object

**Convert radians to degrees and add two extra columns**

In [7]:
poi_df['latitude_degrees'] = poi_df['latitude_radian'].apply(math.degrees)
poi_df['longitude_degrees'] = poi_df['longitude_radian'].apply(math.degrees)

In [8]:
poi_df.head(1)

Unnamed: 0,name,latitude_radian,longitude_radian,num_links,links,num_categories,categories,latitude_degrees,longitude_degrees
0,"YAYCHI, WEST AZERBAIJAN",0.683175,0.778053,13,Baba Jik Rural District; West Azerbaijan Provi...,1,POPULATED PLACES IN CHALDORAN COUNTY,39.143056,44.579167


**Add a column with comma separated lat/longs**

In [9]:
poi_df['latlong'] = list(zip(poi_df['latitude_degrees'], poi_df['longitude_degrees']))

In [10]:
poi_df.head(1)

Unnamed: 0,name,latitude_radian,longitude_radian,num_links,links,num_categories,categories,latitude_degrees,longitude_degrees,latlong
0,"YAYCHI, WEST AZERBAIJAN",0.683175,0.778053,13,Baba Jik Rural District; West Azerbaijan Provi...,1,POPULATED PLACES IN CHALDORAN COUNTY,39.143056,44.579167,"(39.143055555555556, 44.57916666666667)"


**Reverse geocoding**

In [11]:
geolocator = Nominatim(user_agent='world_travel')

*Testing*

In [12]:
location = geolocator.reverse("37.138889, -118.666111")
print(location.address)

Mount Fiske Glacier, John Muir Trail, Fresno County, California, USA


In [13]:
location = geolocator.reverse(poi_df['latlong'][5])
address = location.raw['address']

print(address['country'])

USA


**Creating Country, State, City columns**

In [14]:
# poi_df['country'] = (geolocator.reverse(poi_df['latlong']).raw['address'])['country']
poi_df['country'] = poi_df['latlong'].apply(geolocator.reverse).raw['address']


GeocoderTimedOut: Service timed out

In [59]:
poi_df.tail(2)

Unnamed: 0,name,latitude_radian,longitude_radian,num_links,links,num_categories,categories,latitude_degrees,longitude_degrees,latlong,country
424203,GLEN ROCK HISTORIC DISTRICT,0.694515,-1.33924,6,"York County, Pennsylvania; Historic district (...",4,HISTORIC DISTRICTS ON THE NATIONAL REGISTER OF...,39.792778,-76.732778,"(39.79277777777778, -76.73277777777778)",USA
424204,PURSAT PROVINCE,0.218748,1.813688,33,Cambodia; List of sovereign states; Phnom Penh...,2,PROVINCES OF CAMBODIA; PURSAT PROVINCE,12.533333,103.916667,"(12.533333333333333, 103.91666666666669)",USA


**Plotting points of interest on the map**

In [18]:
poi_map = folium.Map(location=([37.138889, -118.666111]), tiles="OpenStreetMap")

tooltip = 'Click Me!'

folium.Marker([37.138889, -118.666111], popup='<i>Mt. Fiske Glacier</i>', tooltip=tooltip).add_to(poi_map)

# poi_map

<folium.map.Marker at 0x11e4e38d0>