In [8]:
import re
import pandas as pd
import numpy as np
import seaborn as sns
import numpy as np

import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
tqdm.pandas()

In [9]:
from geopy.geocoders import Nominatim
geocoder = Nominatim(user_agent = 'GIS')

from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(geocoder.geocode, min_delay_seconds = 1,   return_value_on_exception = None) 
# adding 1 second padding between calls

In [10]:
def get_info_from_coordinated(row):

    try:
        location = geocoder.reverse((row.city_GIS_1,	row.city_GIS_2))
        location_flattened = list(pd.json_normalize(location.raw).T.to_dict().values())[0]

        location_flattened_str = " ".join(location_flattened.keys())
        result_postcode = re.findall("postcode", location_flattened_str)

        if result_postcode:
            postcode_geo = location.raw['address']['postcode']
            state_geo = location.raw['address']['state']
            city_geo = location.raw['address']['county']

            return postcode_geo, state_geo, city_geo
        else:
            address_geo = location.address

            location_rev = geocode(address_geo).raw
            postcode_geo = location_rev.raw['address']['postcode']
            state_geo = location_rev.raw['address']['state']
            city_geo = location_rev.raw['address']['county']
            return postcode_geo, state_geo, city_geo

    except Exception as e:
        return None, None, None

In [11]:
df = pd.read_csv(r"C:\Users\majmo\Git\ml_demystified\projects\gis\gis_data.csv",index_col=0, delimiter=",")
df = df[['city', 'city_GIS_1', 'city_GIS_2']]

In [12]:
df_sampled = df.sample(frac=0.1)

In [13]:
df_sampled


Unnamed: 0,city,city_GIS_1,city_GIS_2
5099,تاکستان,36.069361,49.694056
6495,شال,35.898278,49.767889
2869,الوند,36.188500,50.058972
444,بوئین زهرا,35.767750,50.057667
4582,اقبالیه,36.230583,49.923722
...,...,...,...
2033,محمدیه,36.223528,50.182528
4991,آبیک,36.044611,50.533694
6144,ضیاآباد,35.995444,49.445278
3748,قزوین,36.288528,50.007111


In [15]:
for row in df_sampled.itertuples():
    print(row)
    postcode_geo, state_geo, city_geo = get_info_from_coordinated(row)
    print(postcode_geo, state_geo, city_geo)


Pandas(Index=5099, city='تاکستان', city_GIS_1=36.069361, city_GIS_2=49.694056)
None None None
Pandas(Index=6495, city='شال', city_GIS_1=35.898278, city_GIS_2=49.767889)
None None None
Pandas(Index=2869, city='الوند', city_GIS_1=36.1885, city_GIS_2=50.058972)
None None None
Pandas(Index=444, city='بوئین زهرا', city_GIS_1=35.76775, city_GIS_2=50.057667)
34517-33461 استان قزوین شهرستان بوئین زهرا
Pandas(Index=4582, city='اقبالیه', city_GIS_1=36.230583, city_GIS_2=49.923722)
None None None
Pandas(Index=4301, city='اقبالیه', city_GIS_1=36.230583, city_GIS_2=49.923722)


KeyboardInterrupt: 

In [104]:
# import the plotly express
import plotly.express as px

# set up the chart from the df dataFrame
fig = px.scatter_geo(df,
                     # longitude is taken from the df["lon"] columns and latitude from df["lat"]
                     lon="city_GIS_1",
                     lat="city_GIS_2",
                     # choose the map chart's projection
                     projection="natural earth",
                     # columns which is in bold in the pop up
                     hover_name="city",
                     # format of the popup not to display these columns' data
                     hover_data={"city": False,
                                 "city_GIS_1": False,
                                 "city_GIS_2": False
                                 }
                     )


# scatter_geo allow to change the map date based on the information from the df dataframe, but we can separately specify the values that are common to all
# change the size of the markers to 25 and color to red
fig.update_traces(marker=dict(size=2, color="red"))

# fit the map to surround the points
fig.update_geos(fitbounds="locations", showcountries=True)

# add title
fig.update_layout(
    title='Your customers'
)
fig.show()
