## Importing Libraries

In [None]:
import pandas as pd
import os
from geopandas.tools import geocode
import modules.psql as psql

## Postgres Configuration

In [None]:
%run config_psql.ipynb

## Settings Configuration

In [None]:
# Settings configurations

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Importing supporting files

In [None]:
file_path = os.path.abspath(os.path.join(os.getcwd(), "../../../config/"))

In [None]:
pw = pd.read_json(file_path + '\\PasswordManager.json', typ = 'series')
ground_region = pd.read_json(file_path + '\\ground_region.json', typ = 'series')

In [None]:
bingmaps_api_key = pw['bingmaps_key']

## Getting source data from postgres

In [None]:
query = """
SELECT
    ground_id,
    city,
    CASE 
        WHEN venue ILIKE '%%'||city||'%%' THEN venue
        ELSE CONCAT(venue, ', ', city)
    END AS address,
    country,
    region,
    active,
    coordinate
FROM dwh.ground
WHERE coordinate IS NULL OR country IS NULL
"""

query = """
SELECT
    ground_id,
    city,
    venue,
    country,
    region,
    active
FROM dwh.ground
WHERE longitude IS NULL OR latitude IS NULL OR country IS NULL OR region IS NULL
"""

In [None]:
with engine.connect() as conn:
    df_ground = pd.read_sql_query(query,con = engine)

In [None]:
print("There are {0} ground entries to be filled. Hang tight!\n".format(len(df_ground)))

for index,row in df_ground.iterrows():
    if row['city'] not in row['venue']:
        location = row['venue'] + ', ' + row['city']
    else:
        location = row['venue']   
    result = geocode(location,
                     api_key = bingmaps_api_key,
                     provider = 'Bing',
                     user_agent = 'Dream11')
    
    longitude = result['geometry'][0].x
    latitude = result['geometry'][0].y
    
    df_ground.at[index, 'longitude'] = longitude
    df_ground.at[index, 'latitude'] = latitude
    df_ground.at[index, 'country'] = result['address'].str.split(', ')[0][-1]
    print("{0} grounds completed".format(index+1))
    
for index, row in df_ground.iterrows():
    df_ground.at[index, 'region'] = ground_region[row['country']]

In [None]:
qry = psql.upsert(
    engine,
    dataFrame = df_ground,
    table = "ground",
    schema = "dwh",
    pk_col = ['ground_id'],
    update_col = ['country','region','longitude','latitude']
)