In [46]:
# Import relevant libraries
from geopy import distance
import requests
import haversine as hs # !pip install haversine
import unidecode # pip install unidecode
import re
from fuzzywuzzy import fuzz

In [47]:
# API Key and URL for Google Geocoding API Reference : https://developers.google.com/maps/documentation/geocoding/?csw=1
API_key = '<INSERT YOUR API KEY>'
base_url = 'https://maps.googleapis.com/maps/api/geocode/json?'

In [51]:
# Example using the Geocoding API
"""
FORMAT OF GOOGLE'S GEODIRECTORY API (for reference):
https://jsonformatter.org/b23640
"""
address = 'Bridgestreet Champs Elysees Accommodations Paris'
params = {
        'key': API_key,
        'address': address
    }
response = requests.get(base_url, params=params).json()
response

{'results': [{'address_components': [{'long_name': '85',
     'short_name': '85',
     'types': ['street_number']},
    {'long_name': 'Rue du Faubourg Saint-Honoré',
     'short_name': 'Rue du Faubourg Saint-Honoré',
     'types': ['route']},
    {'long_name': 'Paris',
     'short_name': 'Paris',
     'types': ['locality', 'political']},
    {'long_name': 'Département de Paris',
     'short_name': 'Département de Paris',
     'types': ['administrative_area_level_2', 'political']},
    {'long_name': 'Île-de-France',
     'short_name': 'IDF',
     'types': ['administrative_area_level_1', 'political']},
    {'long_name': 'France',
     'short_name': 'FR',
     'types': ['country', 'political']},
    {'long_name': '75008', 'short_name': '75008', 'types': ['postal_code']}],
   'formatted_address': '85 Rue du Faubourg Saint-Honoré, 75008 Paris, France',
   'geometry': {'location': {'lat': 48.8717574, 'lng': 2.3135507},
    'location_type': 'ROOFTOP',
    'viewport': {'northeast': {'lat': 48.

In [52]:
def getGeoCoordinates(address,API_key=API_key,base_url=base_url):
    """
    Utility function to get coordinate of an address
    """
    params = {
        'key': API_key,
        'address': address
    }
    response = requests.get(base_url, params=params).json()
    geometry,lat,long,address_new = None,None,None,None
    if response['status'] == 'OK':
        geometry = response['results'][0]['geometry']
        lat = geometry['location']['lat']
        long = geometry['location']['lng']
        address_new = response['results'][0]['formatted_address']
    return {"lat":lat,"long":long}  

In [64]:
def getGeoData(address,API_key=API_key,base_url=base_url):
    """
    Utility function to get full information using geocoding API for an address
    """
    params = {
        'key': API_key,
        'address': address
    }
    response = requests.get(base_url, params=params).json()
    addressData = dict()
    if response['status'] == 'OK':
        for content in response['results'][0]["address_components"]:
            for typ in content["types"]:
                if typ != "political": # removing political label just for clarity 
                    addressData[typ] = content["long_name"]
                    
        geometry = response['results'][0]['geometry']
        addressData["lat"] = geometry['location']['lat']
        addressData["long"] = geometry['location']['lng']
        addressData["address_new"] = response['results'][0]['formatted_address']      
    return addressData 

In [65]:
getGeoData("134 Ashewood Walk, Summerhill Lane, Portlaoise")

{'street_number': '134',
 'route': 'Ashewood Walk',
 'neighborhood': 'Summerhill',
 'locality': 'Portlaoise',
 'administrative_area_level_1': 'County Laois',
 'country': 'Ireland',
 'postal_code': 'R32 C52X',
 'lat': 53.0295704,
 'long': -7.272656499999999,
 'address_new': '134 Summerhill Ln, Ashewood Walk, Summerhill, Portlaoise, Co. Laois, R32 C52X, Ireland'}

In [54]:
def getDistanceGeoLocation(lat1,long1,lat2,long2):
    """
    Utility function to find distance between two geo-locations.
    Euclidean Distance works for the flat surface like a Cartesian plain however, Earth is not flat.
    So we have to use a special type of formula known as Haversine Distance.
    Reference :  https://towardsdatascience.com/calculating-distance-between-two-geolocations-in-python-26ad3afe287b
    """
    return hs.haversine((lat1,long1),(lat2,long2)) # By default the haversine function returns distance in km

In [17]:
# These coordinates are exact same addresses
getDistanceGeoLocation(53.3023988,-6.263774100000001,53.3024516,-6.2644971)

0.048400184820589214

In [55]:
# These two points are very nearby place but not exact places
getDistanceGeoLocation(26.890971,75.7415213,26.886815,75.7442499) 

0.5355256568130167

In [56]:
def getCleanAddress(address):
    """
    Utility function to remove fadas, special string characters adapted from languages of other accents 
    Also remove dash(-) and period(.) and convert them in space, also remove every other non alphanumeric character
    Sample test : getCleanAddress("stävänge dr-ive")
    """
    # Clean fadas, string accents are special string characters adapted from languages of other accents
    cleanAddress = unidecode.unidecode(address) # Reference https://www.geeksforgeeks.org/how-to-remove-string-accents-using-python-3/
    
    # List of english symbols : https://grammar.yourdictionary.com/punctuation/what/fourteen-punctuation-marks.html
    return re.sub('[^A-Za-z0-9\s]+', '',cleanAddress.replace("-"," ").replace("."," "))# Regex to match a string of characters that are not a letters or numbers

In [39]:
def matchParameter(addrcomp1,addrcomp2,param,weights,debug=False):
    """
    Function checks that component of address 1 and address 2 matching is greater than threshold
    """
    if (param in addrcomp1) and (param in addrcomp2):
            if debug:
                print(param + " matched")  
            if fuzz.ratio(addrcomp1[param], addrcomp2[param])>=weights[param]: # the matching ration between the two params
                # for address 1 and address 2 should be greater than the threshold set up for that param
                return True
            else:
                return False
    return True

In [66]:
def compareAddress(addr1,addr2):
    """
    Function to find whether two address are same or not
    """
    addrcomp1 = getGeoData(addr1)
    addrcomp2 = getGeoData(addr2)
    # Threshold set for different params, for example the street number should always match 100% as it is a number
    # for params like locality,administrative_area_level_1 you can have some leniency
    weights = {
        'street_number':100,
        'route':80,
        'neighborhood':80,
        'postal_town':100,
        'locality':80,
        'administrative_area_level_1':80,
        'administrative_area_level_2':80,
        'country':100,
        'postal_code':100    
    }
    
    if len(addrcomp1)!=0 and len(addrcomp2)!=0: # Check whetehr the API was able to provide the results
        dist = getDistanceGeoLocation(addrcomp1["lat"],addrcomp1["long"],addrcomp2["lat"],addrcomp2["long"])
        print("The Haversine distance between addresses : ",dist)
        if dist > 0.25 :
            # The distance between the two addresses is gretaer than 0.25 km or 250 mt we can surely say these are different addresses
            return {"status":"unmatch","reason":"distance between the two addresses greater than threshold set"}
        
        addrcomp1 = {k:getCleanAddress(v.lower()) for k, v in addrcomp1.items() if isinstance(v, str)}
        addrcomp2 = {k:getCleanAddress(v.lower()) for k, v in addrcomp2.items() if isinstance(v, str)}

        for k,v in weights.items():
            matched = matchParameter(addrcomp1,addrcomp2,k,weights,False)
            if matched == False: # That particular param didnt matched as per threshold set
                return {"status":"unmatch","reason":k +" not matched"}
        return {"status":"match","reason":"All Param matched"}
         
    return {"status":"unverified","reason":"Google API was not able to resolve addresses"}

In [58]:
compareAddress("5 Braemor Drive, Churchtown, Co.Dublin","5 Braemor Dr, Newtown Little, Dublin 14,D14 NX40, Ireland")

The Haversine distance between addresses :  0.048400184820589214


{'status': 'unmatch', 'reason': 'street_number not matched'}

In [69]:
compareAddress("5 Braemor Drive, Churchtown, Co.Dublin","5 Braemor Dr, Newtown Little, Dublin 14")

The Haversine distance between addresses :  0.048400184820589214


{'status': 'unmatch', 'reason': 'street_number not matched'}

In [70]:
compareAddress("134 Ashewood Walk, Summerhill Lane, Portlaoise",
               "134 Summerhill Ln, Ashewood Walk, Summerhill, Portlaoise, Co. Laois, R32 C52X, Ireland")

The Haversine distance between addresses :  0.0


{'status': 'match', 'reason': 'All Param matched'}

In [67]:
compareAddress("584, Rani Sati Nagar, Ajmer Road, Jaipur, Rajasthan, 302019",
               "579, Rani Sati Nagar, Ajmer Road, Jaipur, Rajasthan, 302019")

The Haversine distance between addresses :  0.5355256568130167


{'status': 'unmatch',
 'reason': 'distance between the two addresses greater than threshold set'}