# libpostal
https://github.com/openvenues/libpostal

In [None]:
!sudo apt-get install curl autoconf automake libtool python-dev pkg-config
!sudo apt-get install curl autoconf automake libtool pkg-config
!git clone https://github.com/openvenues/libpostal
%cd libpostal
!./bootstrap.sh
!./configure
!make -j4
!sudo make install
!sudo ldconfig
!pip install postal

In [44]:
import pandas as pd

from postal.expand import expand_address
from postal.parser import parse_address

In [45]:
# 使用例

# 住所の正規化
print(expand_address('Quatre vingt douze Ave des Champs-Élysées'))
# 住所を構成要素に解析
print(parse_address('The Book Club 100-106 Leonard St, Shoreditch, London, Greater London, EC2A 4RH, United Kingdom'))

# 正規化せずに構成要素に解析
print(parse_address('Quatre vingt douze Ave des Champs-Élysées'))
# 正規化してから構成要素に解析
print(parse_address(expand_address('Quatre vingt douze Ave des Champs-Élysées')[0]))

['92 avenue des champs-elysees', '92 avenue des champs elysees', '92 avenue des champselysees']
[('the book club', 'house'), ('100-106', 'house_number'), ('leonard st', 'road'), ('shoreditch', 'suburb'), ('london', 'city'), ('greater london', 'state_district'), ('ec2a 4rh', 'postcode'), ('united kingdom', 'country')]
[('quatre vingt douze ave des champs-élysées', 'road')]
[('92', 'house_number'), ('avenue des champs-elysees', 'road')]


In [46]:
# みずほの海外拠点の住所を参考に解析してみる
mizuho_address_list = pd.read_excel("/content/mizuho_address_list.xlsx")
mizuho_address_list

Unnamed: 0,No,拠点名,住所,住所（現地語）,電話番号
0,1,ニューヨーク支店,"1271 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000
1,2,ニューヨーク支店1251ビル出張所,"1251 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000
2,3,米国みずほ銀行,"1271 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000
3,4,米国みずほ銀行1251ビル出張所,"1251 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000
4,5,ワシントンD.C.駐在員事務所,"1275 Pennsylvania Avenue, NW, Suite 310, Washi...",,Tel.1-202-292-5080
...,...,...,...,...,...
62,63,ロンドン支店ドバイ出張所,"The Gate Building, East Wing, Level 5, Dubai I...",,Tel.971-4-279-4400
63,64,ロンドン支店アブダビ出張所,"Al Sila Tower, 24th Floor, Abu Dhabi Global Ma...",,Tel.971-2-694-8551
64,65,イスタンブール駐在員事務所,Esentepe Mah. Buyukdere Cad. No:175 Ferko Sign...,,Tel.90-212-932-8251
65,66,ロンドン支店ヨハネスブルグ出張所,"2nd Floor, West Tower, Maude Street, Nelson Ma...",,Tel.27-11-881-5410


In [47]:
expand_address_libpostal = []
parse_address_libpostal = []
expand_then_parse_address_libpostal = []
for i,address in enumerate(mizuho_address_list['住所']):
  # 正規化された住所
  expand_address_libpostal.append(expand_address(address))
  # 構造化された住所
  parse_address_libpostal.append(parse_address(address))
  # 正規化してから構造化した住所
  expand_then_parse_address_libpostal.append(parse_address(expand_address(address)[0]))

mizuho_address_list['expand_address_libpostal'] = expand_address_libpostal
mizuho_address_list['parse_address_libpostal'] = parse_address_libpostal
mizuho_address_list['expand_then_parse_address_libpostal'] = expand_then_parse_address_libpostal
mizuho_address_list

Unnamed: 0,No,拠点名,住所,住所（現地語）,電話番号,expand_address_libpostal,parse_address_libpostal,expand_then_parse_address_libpostal
0,1,ニューヨーク支店,"1271 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1271 avenue of the americas new york ny 10020...,"[(1271, house_number), (avenue of the americas...","[(1271, house_number), (avenue of the americas..."
1,2,ニューヨーク支店1251ビル出張所,"1251 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1251 avenue of the americas new york ny 10020...,"[(1251, house_number), (avenue of the americas...","[(1251, house_number), (avenue of the americas..."
2,3,米国みずほ銀行,"1271 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1271 avenue of the americas new york ny 10020...,"[(1271, house_number), (avenue of the americas...","[(1271, house_number), (avenue of the americas..."
3,4,米国みずほ銀行1251ビル出張所,"1251 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1251 avenue of the americas new york ny 10020...,"[(1251, house_number), (avenue of the americas...","[(1251, house_number), (avenue of the americas..."
4,5,ワシントンD.C.駐在員事務所,"1275 Pennsylvania Avenue, NW, Suite 310, Washi...",,Tel.1-202-292-5080,[1275 pennsylvania avenue northwest suite 310 ...,"[(1275, house_number), (pennsylvania avenue nw...","[(1275, house_number), (pennsylvania avenue no..."
...,...,...,...,...,...,...,...,...
62,63,ロンドン支店ドバイ出張所,"The Gate Building, East Wing, Level 5, Dubai I...",,Tel.971-4-279-4400,[the gate building east wing level 5 dubai int...,[(the gate building east wing level 5 dubai in...,[(the gate building east wing level 5 dubai in...
63,64,ロンドン支店アブダビ出張所,"Al Sila Tower, 24th Floor, Abu Dhabi Global Ma...",,Tel.971-2-694-8551,[alabama sila tower 24th floor abu dhabi globa...,"[(al sila tower 24th floor, house), (abu, city...","[(alabama sila tower 24th floor, house), (abu,..."
64,65,イスタンブール駐在員事務所,Esentepe Mah. Buyukdere Cad. No:175 Ferko Sign...,,Tel.90-212-932-8251,[esentepe mahallesi buyukdere caddesi numara 1...,"[(esentepe mah. buyukdere cad., road), (no 175...","[(esentepe mahallesi, house), (buyukdere cadde..."
65,66,ロンドン支店ヨハネスブルグ出張所,"2nd Floor, West Tower, Maude Street, Nelson Ma...",,Tel.27-11-881-5410,[2nd floor west tower maude street nelson mand...,"[(2nd floor, house_number), (west tower maude ...","[(2nd floor, house_number), (west tower maude ..."


In [48]:
mizuho_address_list.to_csv('/content/mizuho_address_list_structured.csv',encoding='utf-8-sig')

# GeocodingAPI
https://developers.google.com/maps/documentation/geocoding?hl=ja

In [49]:
!pip install -U googlemaps

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [50]:
import googlemaps
YOUR_API_KEY = "YOUR_API_KEY"
# https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY

gmaps = googlemaps.Client(key=YOUR_API_KEY)

In [51]:
expand_address_googlemaps = []
parse_address_googlemaps = []
expand_then_parse_address_googlemaps = []
for i,address in enumerate(mizuho_address_list['住所']):
  # 住所のジオコーディング(Geocoding API)
  geocode_result = gmaps.geocode(address)

  # 正規化された住所
  expand_address_googlemaps.append(geocode_result[0]['formatted_address'])
  # 構造化された住所
  parse_address_googlemaps.append([(s['long_name'],s['types']) for s in geocode_result[0]['address_components']])
  # 正規化してから構造化した住所
  geocode_result_formatted_address = gmaps.geocode(geocode_result[0]['formatted_address'])
  expand_then_parse_address_googlemaps.append([(s['long_name'],s['types']) for s in geocode_result_formatted_address[0]['address_components']])

mizuho_address_list['expand_address_googlemaps'] = expand_address_googlemaps
mizuho_address_list['parse_address_googlemaps'] = parse_address_googlemaps
mizuho_address_list['expand_then_parse_address_googlemaps'] = expand_then_parse_address_googlemaps
mizuho_address_list

Unnamed: 0,No,拠点名,住所,住所（現地語）,電話番号,expand_address_libpostal,parse_address_libpostal,expand_then_parse_address_libpostal,expand_address_googlemaps,parse_address_googlemaps,expand_then_parse_address_googlemaps
0,1,ニューヨーク支店,"1271 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1271 avenue of the americas new york ny 10020...,"[(1271, house_number), (avenue of the americas...","[(1271, house_number), (avenue of the americas...","1271 6th Ave, New York, NY 10020, USA","[(1271, [street_number]), (6th Avenue, [route]...","[(1271, [street_number]), (6th Avenue, [route]..."
1,2,ニューヨーク支店1251ビル出張所,"1251 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1251 avenue of the americas new york ny 10020...,"[(1251, house_number), (avenue of the americas...","[(1251, house_number), (avenue of the americas...","1251 Avenue of the Americas, New York, NY 1002...","[(1251 Avenue of the Americas, [premise]), (Ma...","[(1251 Avenue of the Americas, [premise]), (Ma..."
2,3,米国みずほ銀行,"1271 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1271 avenue of the americas new york ny 10020...,"[(1271, house_number), (avenue of the americas...","[(1271, house_number), (avenue of the americas...","1271 6th Ave, New York, NY 10020, USA","[(1271, [street_number]), (6th Avenue, [route]...","[(1271, [street_number]), (6th Avenue, [route]..."
3,4,米国みずほ銀行1251ビル出張所,"1251 Avenue of the Americas, New York, NY 1002...",,Tel.1-212-282-3000,[1251 avenue of the americas new york ny 10020...,"[(1251, house_number), (avenue of the americas...","[(1251, house_number), (avenue of the americas...","1251 Avenue of the Americas, New York, NY 1002...","[(1251 Avenue of the Americas, [premise]), (Ma...","[(1251 Avenue of the Americas, [premise]), (Ma..."
4,5,ワシントンD.C.駐在員事務所,"1275 Pennsylvania Avenue, NW, Suite 310, Washi...",,Tel.1-202-292-5080,[1275 pennsylvania avenue northwest suite 310 ...,"[(1275, house_number), (pennsylvania avenue nw...","[(1275, house_number), (pennsylvania avenue no...","1275 Pennsylvania Avenue NW UNIT 310, Washingt...","[(UNIT 310, [subpremise]), (1275, [street_numb...","[(UNIT 310, [subpremise]), (1275, [street_numb..."
...,...,...,...,...,...,...,...,...,...,...,...
62,63,ロンドン支店ドバイ出張所,"The Gate Building, East Wing, Level 5, Dubai I...",,Tel.971-4-279-4400,[the gate building east wing level 5 dubai int...,[(the gate building east wing level 5 dubai in...,[(the gate building east wing level 5 dubai in...,677J+WFX - Trade Centre - DIFC - Dubai - Unite...,"[(677J+WFX, [plus_code]), (DIFC, [neighborhood...","[(677J+WFX, [plus_code]), (DIFC, [neighborhood..."
63,64,ロンドン支店アブダビ出張所,"Al Sila Tower, 24th Floor, Abu Dhabi Global Ma...",,Tel.971-2-694-8551,[alabama sila tower 24th floor abu dhabi globa...,"[(al sila tower 24th floor, house), (abu, city...","[(alabama sila tower 24th floor, house), (abu,...",Al Sila Tower - Al Maryah Island - Abu Dhabi G...,"[(Al Sila Tower, [premise]), (Abu Dhabi Global...","[(Al Sila Tower, [premise]), (Abu Dhabi Global..."
64,65,イスタンブール駐在員事務所,Esentepe Mah. Buyukdere Cad. No:175 Ferko Sign...,,Tel.90-212-932-8251,[esentepe mahallesi buyukdere caddesi numara 1...,"[(esentepe mah. buyukdere cad., road), (no 175...","[(esentepe mahallesi, house), (buyukdere cadde...","Esentepe, Kanyon AVM, 34394 Şişli/İstanbul, Tü...","[(Kanyon AVM, [route]), (Esentepe, [administra...","[(Kanyon AVM, [route]), (Esentepe, [administra..."
65,66,ロンドン支店ヨハネスブルグ出張所,"2nd Floor, West Tower, Maude Street, Nelson Ma...",,Tel.27-11-881-5410,[2nd floor west tower maude street nelson mand...,"[(2nd floor, house_number), (west tower maude ...","[(2nd floor, house_number), (west tower maude ...","Maude St, Sandton, 2196, South Africa","[(Maude Street, [route]), (Sandton, [locality,...","[(Maude Street, [route]), (Sandton, [locality,..."


In [52]:
mizuho_address_list.to_csv('/content/mizuho_address_list_structured.csv',encoding='utf-8-sig')

In [53]:
# 参考(他の機能orAPI)

# 逆ジオコーディング(Geocoding API)
# reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

# 公共交通機関を利用した案内(Directions API)
# from datetime import datetime
# now = datetime.now()
# directions_result = gmaps.directions("Sydney Town Hall",
#                                      "Parramatta, NSW",
#                                      mode="transit",
#                                      departure_time=now)

# アドレスバリデーションで住所を検証(Address Validation API)
# addressvalidation_result =  gmaps.addressvalidation(['1600 Amphitheatre Pk'], 
#                                                     regionCode='US',
#                                                     locality='Mountain View', 
#                                                     enableUspsCass=True)