# Find appropriate coordinates for every timezone

In [1]:
import pandas as pd

## Get timezones

In [2]:
import pytz

def generate_timezones():
    for timezone in pytz.common_timezones:
        split = timezone.replace("_", " ").split("/")
        match len(split):
            case 1: continue
            case 2: region, subregion, place = split[0], None,     split[1]
            case 3: region, subregion, place = split[0], split[1], split[2]
            case _: raise ValueError(f"Unexpected timezone format: {timezone}")
        if region == "US" or region == "Canada":
            continue
        yield timezone, region, subregion, place

df_timezones = pd.DataFrame(
    list(generate_timezones()),
    columns=['timezone', 'region', 'subregion', 'place']
).set_index('timezone')

df_timezones

Unnamed: 0_level_0,region,subregion,place
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa/Abidjan,Africa,,Abidjan
Africa/Accra,Africa,,Accra
Africa/Addis_Ababa,Africa,,Addis Ababa
Africa/Algiers,Africa,,Algiers
Africa/Asmara,Africa,,Asmara
...,...,...,...
Pacific/Tahiti,Pacific,,Tahiti
Pacific/Tarawa,Pacific,,Tarawa
Pacific/Tongatapu,Pacific,,Tongatapu
Pacific/Wake,Pacific,,Wake


## Get a city-coordinates database from `libtimezonemap`

In [3]:
df = pd.read_csv(
    '/usr/share/libtimezonemap/ui/cities15000.txt',
    delimiter='\t',
    header=None,
    usecols=[2, 4, 5, 17],
    names=['city', 'latitude', 'longitude', 'timezone'],
    quotechar='_'  # allow all other weird characters
).set_index('city')

df.index = df.index.map(lambda x: x.replace("'", " "))

df_cities = df

df_cities

Unnamed: 0_level_0,latitude,longitude,timezone
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
les Escaldes,42.50729,1.53414,Europe/Andorra
Andorra la Vella,42.50779,1.52109,Europe/Andorra
Umm al Qaywayn,25.56473,55.55517,Asia/Dubai
Ras al-Khaimah,25.78953,55.94320,Asia/Dubai
Khawr Fakkan,25.33132,56.34199,Asia/Dubai
...,...,...,...
Bulawayo,-20.15000,28.58333,Africa/Harare
Bindura,-17.30192,31.33056,Africa/Harare
Beitbridge,-22.21667,30.00000,Africa/Harare
Epworth,-17.89000,31.14750,Africa/Harare


## Find timezones coordinates in the database

### Solve some by finding unique timezones in the database

In [4]:
df_results = df_cities.drop_duplicates(subset=['timezone'], keep=False).set_index('timezone')

df_timezones = df_timezones[~df_timezones.index.isin(df_results.index)]  # remove found timezones

df_results

Unnamed: 0_level_0,latitude,longitude
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1
America/Antigua,17.11717,-61.84573
America/Anguilla,18.21704,-63.05783
America/Argentina/Ushuaia,-54.80000,-68.30000
Pacific/Pago_Pago,-14.27806,-170.70250
Australia/Broken_Hill,-31.96173,141.45998
...,...,...
America/North_Dakota/New_Salem,46.82666,-100.88958
Europe/Vatican,41.90236,12.45332
Pacific/Efate,-17.73381,168.32188
Pacific/Wallis,-13.28163,-176.17453


In [5]:
for timezone_row in df_timezones.itertuples():
    timezone = timezone_row.Index
    found = False
    for city in df_cities.itertuples():
        city_name = city.Index
        if timezone_row.place == city_name:
            if timezone == city.timezone:
                found = True
                df_timezones = df_timezones.drop(timezone)
                df_results.loc[timezone] = [city.latitude, city.longitude]
                break
            print(f"((Hint) {timezone}: found city {city_name} but its timezone was {city.timezone})")

    if not found:
        print(f"{timezone} not matched to any city")

Africa/El_Aaiun not matched to any city
Africa/Ndjamena not matched to any city
((Hint) Africa/Tripoli: found city Tripoli but its timezone was Europe/Athens)
((Hint) Africa/Tripoli: found city Tripoli but its timezone was Asia/Beirut)
America/Adak not matched to any city
America/Argentina/Catamarca not matched to any city
America/Argentina/Jujuy not matched to any city
America/Argentina/Tucuman not matched to any city
America/Aruba not matched to any city
America/Atikokan not matched to any city
America/Bahia not matched to any city
America/Bahia_Banderas not matched to any city
((Hint) America/Belem: found city Belem but its timezone was America/Fortaleza)
America/Belize not matched to any city
America/Blanc-Sablon not matched to any city
America/Cambridge_Bay not matched to any city
((Hint) America/Cayenne: found city Cayenne but its timezone was Europe/Paris)
((Hint) America/Ciudad_Juarez: found city Ciudad Juarez but its timezone was America/Ojinaga)
America/Ciudad_Juarez not matc

### Remaining timezones to match

In [6]:
df_timezones

Unnamed: 0_level_0,region,subregion,place
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa/El_Aaiun,Africa,,El Aaiun
Africa/Ndjamena,Africa,,Ndjamena
America/Adak,America,,Adak
America/Argentina/Catamarca,America,Argentina,Catamarca
America/Argentina/Jujuy,America,Argentina,Jujuy
...,...,...,...
Pacific/Marquesas,Pacific,,Marquesas
Pacific/Midway,Pacific,,Midway
Pacific/Nauru,Pacific,,Nauru
Pacific/Tahiti,Pacific,,Tahiti


### Matched timezones

In [7]:
df_results

Unnamed: 0_level_0,latitude,longitude
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1
America/Antigua,17.11717,-61.84573
America/Anguilla,18.21704,-63.05783
America/Argentina/Ushuaia,-54.80000,-68.30000
Pacific/Pago_Pago,-14.27806,-170.70250
Australia/Broken_Hill,-31.96173,141.45998
...,...,...
Pacific/Auckland,-36.86667,174.76667
Pacific/Honolulu,21.30694,-157.85833
Pacific/Majuro,7.08971,171.38027
Pacific/Noumea,-22.27631,166.45720


## Search remaining timezones online

In [8]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="Cinnamon Desktop Applet Auto-night-light, manual single shot")

for timezone_row in df_timezones.itertuples():
    timezone = timezone_row.Index
    subregion = timezone_row.subregion
    geocode = geolocator.geocode(f"{timezone_row.place}{f", {subregion}" if subregion else ""}")
    if not geocode:
        print(f"No coordinates found for {timezone}")
        continue
    print(f"{timezone} matched to {geocode.address}")
    df_timezones = df_timezones.drop(timezone)
    df_results.loc[timezone] = [round(geocode.latitude, 5), round(geocode.longitude, 5)]

Africa/El_Aaiun matched to Laâyoune العيون, Pachalik de Laâyoune باشوية العيون, Province de Laâyoune إقليم العيون, Maroc ⵍⵎⵖⵔⵉⴱ المغرب
Africa/Ndjamena matched to N'Djaména انجمينا, Tchad تشاد
America/Adak matched to Adak, Aleutians West Census Area, Alaska, 99546, United States
America/Argentina/Catamarca matched to Catamarca, Argentina
America/Argentina/Jujuy matched to Jujuy, Argentina
America/Argentina/Tucuman matched to Tucumán, Argentina
America/Aruba matched to Aruba, Nederland
America/Atikokan matched to Atikokan, Rainy River District, Northwestern Ontario, Ontario, Canada
America/Bahia matched to Bahia, Região Nordeste, Brasil
America/Bahia_Banderas matched to Bahía Banderas, Bahías de Jaltenco, Alborada Jaltenco, Jaltenco, 55783, México
America/Belize matched to Belize
America/Blanc-Sablon matched to Blanc-Sablon, Le Golfe-du-Saint-Laurent, Côte-Nord, Québec, Canada
America/Cambridge_Bay matched to Iqaluktuuttiaq (Cambridge Bay), ᕿᑎᕐᒥᐅᑦ Kitikmeot Region, ᓄᓇᕗᑦ Nunavut, X0B 0C0,

### Remaining timezones to match

In [9]:
df_timezones

Unnamed: 0_level_0,region,subregion,place
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Antarctica/DumontDUrville,Antarctica,,DumontDUrville


### Matched timezones

In [10]:
df_results

Unnamed: 0_level_0,latitude,longitude
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1
America/Antigua,17.11717,-61.84573
America/Anguilla,18.21704,-63.05783
America/Argentina/Ushuaia,-54.80000,-68.30000
Pacific/Pago_Pago,-14.27806,-170.70250
Australia/Broken_Hill,-31.96173,141.45998
...,...,...
Pacific/Marquesas,-9.77941,-139.00678
Pacific/Midway,31.02602,-95.75050
Pacific/Nauru,-0.52523,166.93244
Pacific/Tahiti,-17.68734,-149.44517


## Check and rectify the results manually

Check procedure:
- check if the `timezone` name makes sense with the address found,
- if not:
  - search on a web search engine if the full `timezone` gives similar results to the address found by Nomatim,
  - if not: find better or more values for a new search

In [11]:
timezones_to_search_again = {
    'America/Creston': {'city': "Creston", 'country': "Canada"},
    'America/Dawson': {'city': "Dawson", 'country': "Canada"},
    'America/St_Johns': "Saint Johns Island, Canada",
    'America/St_Vincent': "Saint Vincent and the Grenadines",
    'America/Thule': {'city': "Thule", 'country': "Greenland"},
    'Antarctica/Casey': "Casey Station",
    'Antarctica/Davis': "Davis Station",
    'Antarctica/DumontDUrville': "Dumont d'Urville Station",
    'Antarctica/Mawson': "Mawson Station",
    'Antarctica/McMurdo': "McMurdo Station",
    'Asia/Ho_Chi_Minh': {'city': "Ho Chi Minh", 'country': "Vietnam"},
    'Atlantic/Canary': "Canary Islands, Spain",
    'Australia/Lindeman': "Lindeman Island, Australia",
    'Indian/Chagos': "Chagos Archipelago",
    'Indian/Comoro': "Comoro Islands",
    'Pacific/Chatham': "Chatham, New Zealand",
    'Pacific/Easter': "Easter Island, Chile",
    'Pacific/Galapagos': "Galapagos Islands",
    'Pacific/Gambier': "Gambier Islands",
    'Pacific/Midway': "Midway Atoll",
    'Pacific/Wake': "Minor Outlying Islands"
}

for timezone, search in timezones_to_search_again.items():
    geocode = geolocator.geocode(search)
    if not geocode:
        print(f"No coordinates found for {timezone}")
        continue
    print(f"{timezone} matched to {geocode.address}")
    df_results.loc[timezone] = [round(geocode.latitude, 5), round(geocode.longitude, 5)]

America/Creston matched to Creston, Regional District of Central Kootenay, British Columbia, Canada
America/Dawson matched to Dawson City, Yukon, Y0B 1G0, Canada
America/St_Johns matched to Saint Johns Island, Labrador, Newfoundland and Labrador, Canada
America/St_Vincent matched to Saint Vincent and the Grenadines
America/Thule matched to Qaanaaq, Avannaata, 3971, Kalaallit Nunaat
Antarctica/Casey matched to Casey Station, Shirley Island Walking Route, Casey Station
Antarctica/Davis matched to Davis Station
Antarctica/DumontDUrville matched to Base Dumont d'Urville
Antarctica/Mawson matched to Mawson Station, Alternate Route, Mawson Station
Antarctica/McMurdo matched to McMurdo Station
Asia/Ho_Chi_Minh matched to Thành phố Hồ Chí Minh, Việt Nam
Atlantic/Canary matched to Canary Islands, España
Australia/Lindeman matched to Lindeman Island, Coral Sea, Mackay Regional, Queensland, Australia
Indian/Chagos matched to Chagos Archipelago, Downtown, British Indian Ocean Territory
Indian/Como

In [12]:
df_results

Unnamed: 0_level_0,latitude,longitude
timezone,Unnamed: 1_level_1,Unnamed: 2_level_1
America/Antigua,17.11717,-61.84573
America/Anguilla,18.21704,-63.05783
America/Argentina/Ushuaia,-54.80000,-68.30000
Pacific/Pago_Pago,-14.27806,-170.70250
Australia/Broken_Hill,-31.96173,141.45998
...,...,...
Pacific/Midway,28.24175,-177.37543
Pacific/Nauru,-0.52523,166.93244
Pacific/Tahiti,-17.68734,-149.44517
Pacific/Wake,16.72882,-169.53338


## Export results

In [13]:
import json

dict_results = df_results[['latitude', 'longitude']].T.to_dict('list')

json_results = json.dumps(dict_results, sort_keys=True)

with open('database.json', 'w') as f:
    f.write(json_results)