In [1]:
import numpy as np
import pandas as pd
from geopy.geocoders import GoogleV3
import json, os

def config():
    with open('config.json', 'r') as f:
        params = json.load(f)
    return params

def read_mouza(mouza_path):
    df = pd.read_csv("split_data/split_mouzas{}.csv".format(mouza_path))
    return df




In [2]:
def coor_search_pass1(mouzas, gmaps):
    '''
    We will later go for more passes if this fails
    '''
    
    
    
    # Storing Raw data
    raw = {}
    
    # Stats
    hit_count = 0
    
    for idx, mouza in enumerate(mouzas['name']):
        percentage = idx / len(mouzas) * 100
        try:
            result = gmaps.geocode('{}, West Bengal'.format(mouza))
            if result == None:
                raise Exception('Value Not Found')
                
            raw[mouza] = result.raw
            mouzas.set_value(idx, 'latitude', result.latitude)
            mouzas.set_value(idx, 'longitude', result.longitude)
            
            hit_count += 1
            print('{0}% \t {1} \t\t {2}'.format(percentage, mouza, result.address)) 
        except Exception as e:
            raw[mouza] = None
            mouzas.set_value(idx, 'latitude', -1)
            mouzas.set_value(idx, 'longitude', -1)
            
            print('{0}% \t {1} \t\t {2}'.format(percentage, mouza, e)) 
        
    print('Hit percent = {}%'.format(hit_count/len(mouzas)*100.0))
    return mouzas, raw



In [3]:
params = config()
api_key = params["api_key"]

mouza_path = int(input("Enter file no : "))
mouzas = read_mouza(mouza_path)
# Add cols to mouza
mouzas['latitude'] = pd.Series(0.0, index=mouzas.index)
mouzas['longitude'] = pd.Series(0.0, index=mouzas.index)

gmaps = GoogleV3(api_key=api_key)

Enter file no : 1


In [4]:
 test_loc , test_raw = coor_search_pass1(mouzas=mouzas[:10], gmaps=gmaps)

0.0% 	 Paira 		 Paira, West Bengal 722140, India
10.0% 	 Simlabandh 		 Simlabandh, West Bengal 722140, India
20.0% 	 Bhedua 		 Bhedua, West Bengal 722203, India
30.0% 	 Lutia 		 Lutia, West Bengal 722145, India
40.0% 	 Jhapandihi 		 Jhapandihi, West Bengal, India
50.0% 	 Kendua 		 Kendua, West Bengal, India
60.0% 	 Golakpur 		 Golakpur, West Bengal, India
70.0% 	 Bhutardihi 		 Bhutardihi, West Bengal 722140, India
80.0% 	 Manora 		 Manora, West Bengal 733209, India
90.0% 	 Dharampur 		 Dharampur, Hooghly, West Bengal, India
Hit percent = 100.0%


In [5]:
test_loc

Unnamed: 0,id,name,jl_number,block_id,latitude,longitude
0,2040,Paira,44,12.0,23.046997,86.755465
1,2041,Simlabandh,45,12.0,23.055767,86.747923
2,2042,Bhedua,46,12.0,23.200998,87.249083
3,2043,Lutia,47,12.0,23.024612,86.853799
4,2044,Jhapandihi,48,12.0,23.032173,86.757979
5,2045,Kendua,49,12.0,24.969577,88.244901
6,2046,Golakpur,50,12.0,22.932316,87.307826
7,2047,Bhutardihi,51,12.0,23.026325,86.763006
8,2048,Manora,52,12.0,26.022993,87.881977
9,2049,Dharampur,53,12.0,22.87687,88.384625


In [6]:
test_raw

{'Bhedua': {'address_components': [{'long_name': 'Bhedua',
    'short_name': 'Bhedua',
    'types': ['locality', 'political']},
   {'long_name': 'Bankura',
    'short_name': 'Bankura',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'West Bengal',
    'short_name': 'WB',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'India',
    'short_name': 'IN',
    'types': ['country', 'political']},
   {'long_name': '722203', 'short_name': '722203', 'types': ['postal_code']}],
  'formatted_address': 'Bhedua, West Bengal 722203, India',
  'geometry': {'bounds': {'northeast': {'lat': 23.2023611, 'lng': 87.250607},
    'southwest': {'lat': 23.1993829, 'lng': 87.24807489999999}},
   'location': {'lat': 23.2009979, 'lng': 87.2490835},
   'location_type': 'APPROXIMATE',
   'viewport': {'northeast': {'lat': 23.2023611, 'lng': 87.2506899302915},
    'southwest': {'lat': 23.1993829, 'lng': 87.2479919697085}}},
  'place_id': 'ChIJ2ZNx_kSY9zkRccvoNh

In [7]:
def save_results(df, raw, path='results/'):
    final_directory = os.path.join(os.getcwd(), path)
    if not os.path.exists(final_directory):
        os.mkdir(final_directory)

    df.to_csv('{}mouzas_coor{}.csv'.format(final_directory, mouza_path), index=False)
    with open('{}mouzas_raw{}.json'.format(final_directory, mouza_path), 'w') as f:
        json.dump(raw, f, indent=4)

In [8]:
save_results(test_loc, test_raw)

In [35]:
os.path.join(os.getcwd(), 'results')

'/home/guardian/MEGA/tessellateX/data_sampling/splitting data/results'