## Importing Libraries

In [279]:
import pandas as pd
import os
from geopandas.tools import geocode, reverse_geocode
import modules.psql as psql
from shapely.geometry import Point

## Postgres Configuration

In [254]:
%run config_psql.ipynb

## Settings Configuration

In [255]:
# Settings configurations

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Importing supporting files

In [256]:
file_path = os.path.abspath(os.path.join(os.getcwd(), "../../../config/"))

In [257]:
pw = pd.read_json(file_path + '\\PasswordManager.json', typ = 'series')
ground_region = pd.read_json(file_path + '\\ground_region.json', typ = 'series')

In [258]:
bingmaps_api_key = pw['bingmaps_key']

## Getting source data from postgres

In [271]:
query = """
SELECT
    ground_id,
    city,
    venue,
    country,
    region,
    active,
    longitude,
    latitude
FROM dwh.ground
WHERE longitude IS NULL OR latitude IS NULL OR country IS NULL OR region IS NULL or city is NULL
"""

In [272]:
with engine.connect() as conn:
    df_ground = pd.read_sql_query(query,con = engine)

In [261]:
print("There are {0} ground entries to be filled. Hang tight!\n".format(len(df_ground)))

for index,row in df_ground.iterrows():
    if row['city'] is not None and row['city'] not in row['venue']:
        location = row['venue'] + ', ' + row['city']
    else:
        location = row['venue']   
    result = geocode(location,
                     api_key = bingmaps_api_key,
                     provider = 'Bing',
                     user_agent = 'Dream11')
    
    longitude = result['geometry'][0].x
    latitude = result['geometry'][0].y
    
    df_ground.at[index, 'longitude'] = longitude
    df_ground.at[index, 'latitude'] = latitude
    df_ground.at[index, 'country'] = result['address'].str.split(', ')[0][-1]
    
    # print("{0} grounds completed".format(index+1))
    if (index%10)==0:
        print("{0} grounds completed".format(index))

There are 796 ground entries to be filled. Hang tight!

0 grounds completed
10 grounds completed
20 grounds completed
30 grounds completed
40 grounds completed
50 grounds completed
60 grounds completed
70 grounds completed
80 grounds completed
90 grounds completed
100 grounds completed
110 grounds completed
120 grounds completed
130 grounds completed
140 grounds completed
150 grounds completed
160 grounds completed
170 grounds completed
180 grounds completed
190 grounds completed
200 grounds completed
210 grounds completed
220 grounds completed
230 grounds completed
240 grounds completed
250 grounds completed
260 grounds completed
270 grounds completed
280 grounds completed
290 grounds completed
300 grounds completed
310 grounds completed
320 grounds completed
330 grounds completed
340 grounds completed
350 grounds completed
360 grounds completed
370 grounds completed
380 grounds completed
390 grounds completed
400 grounds completed
410 grounds completed
420 grounds completed
430 groun

In [265]:
for index, row in df_ground.iterrows():
    df_ground.at[index, 'region'] = ground_region[row['country']]

In [303]:
for index,row in df_ground.iterrows():
    if row['city'] is None:
        result = reverse_geocode(Point([row['longitude'],row['latitude']]),
                                 api_key = bingmaps_api_key,
                                 provider = 'Bing',
                                 user_agent = 'Dream11')

        address_parts = result.address[0].split(',')
        city = address_parts[-3].strip() if len(address_parts) >= 3 else (address_parts[-2].strip() if len(address_parts) >= 2 else None)
        df_ground.at[index, 'city'] = city

        if (index%10)==0:
            print("{0} grounds completed".format(index))

10 grounds completed
20 grounds completed
30 grounds completed
40 grounds completed
50 grounds completed
60 grounds completed
70 grounds completed
80 grounds completed
90 grounds completed
100 grounds completed


In [306]:
qry = psql.upsert(
    engine,
    dataFrame = df_ground,
    table = "ground",
    schema = "dwh",
    pk_col = ['ground_id'],
    update_col = ['city','country','region','longitude','latitude']
)