# Batch convert lng,lat to timezone name in dataframes

## Setup

In [1]:
import citiespy
import numpy as np
import pandas as pd
import polars as pl
import tzfpy
from timezonefinder import TimezoneFinder

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
tf = TimezoneFinder(in_memory=True)
tf.timezone_at(lng=0, lat=0)

'Etc/GMT'

In [3]:
tzfpy.get_tz(0, 0)

'Etc/GMT'

In [4]:
dir(citiespy)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'all_cities',
 'citiespy',
 'random_city']

In [5]:
cities_as_dict = []
for city in citiespy.all_cities():
    cities_as_dict.append({"name": city.name, "lng": city.lng, "lat": city.lat})

In [6]:
len(cities_as_dict)

145611

## Pandas

In [7]:
df = pd.DataFrame(cities_as_dict)

In [8]:
df

Unnamed: 0,name,lng,lat
0,Sant Julià de Lòria,1.49129,42.46372
1,Pas de la Casa,1.73361,42.54277
2,Ordino,1.53319,42.55623
3,les Escaldes,1.53414,42.50729
4,la Massana,1.51483,42.54499
...,...,...,...
145606,Beitbridge,30.00000,-22.21667
145607,Beatrice,30.84730,-18.25283
145608,Banket,30.40000,-17.38333
145609,Epworth,31.14750,-17.89000


In [9]:
%%time
df["tz_from_timezonefinder"] = df.apply(lambda x: tf.timezone_at(lng=x.lng, lat=x.lat), axis=1)

CPU times: user 2.59 s, sys: 52.4 ms, total: 2.64 s
Wall time: 2.64 s


In [10]:
%%time
df["tz_from_tzfpy"] = df.apply(lambda x: tzfpy.get_tz(x.lng, x.lat), axis=1)

CPU times: user 802 ms, sys: 5.3 ms, total: 808 ms
Wall time: 807 ms


In [11]:
df

Unnamed: 0,name,lng,lat,tz_from_timezonefinder,tz_from_tzfpy
0,Sant Julià de Lòria,1.49129,42.46372,Europe/Andorra,Europe/Andorra
1,Pas de la Casa,1.73361,42.54277,Europe/Andorra,Europe/Andorra
2,Ordino,1.53319,42.55623,Europe/Andorra,Europe/Andorra
3,les Escaldes,1.53414,42.50729,Europe/Andorra,Europe/Andorra
4,la Massana,1.51483,42.54499,Europe/Andorra,Europe/Andorra
...,...,...,...,...,...
145606,Beitbridge,30.00000,-22.21667,Africa/Harare,Africa/Harare
145607,Beatrice,30.84730,-18.25283,Africa/Harare,Africa/Harare
145608,Banket,30.40000,-17.38333,Africa/Harare,Africa/Harare
145609,Epworth,31.14750,-17.89000,Africa/Harare,Africa/Harare


## Pandas(Vectorized)

In [12]:
vec_tzfpy_get_tz = np.vectorize(tzfpy.get_tz)
vec_timezonefinder_timezone_at = np.vectorize(tf.timezone_at)

In [13]:
%%time
df["tz_from_tzfpy_vectorized"] = vec_tzfpy_get_tz(df["lng"], df["lat"])

CPU times: user 201 ms, sys: 4.44 ms, total: 206 ms
Wall time: 205 ms


  outputs = ufunc(*inputs)


In [14]:
%%time
df["tz_from_timezonefinder_vectorized"] = vec_timezonefinder_timezone_at(lng=df["lng"], lat=df["lat"])

CPU times: user 1.95 s, sys: 72.5 ms, total: 2.02 s
Wall time: 2.02 s


In [15]:
df

Unnamed: 0,name,lng,lat,tz_from_timezonefinder,tz_from_tzfpy,tz_from_tzfpy_vectorized,tz_from_timezonefinder_vectorized
0,Sant Julià de Lòria,1.49129,42.46372,Europe/Andorra,Europe/Andorra,Europe/Andorra,Europe/Andorra
1,Pas de la Casa,1.73361,42.54277,Europe/Andorra,Europe/Andorra,Europe/Andorra,Europe/Andorra
2,Ordino,1.53319,42.55623,Europe/Andorra,Europe/Andorra,Europe/Andorra,Europe/Andorra
3,les Escaldes,1.53414,42.50729,Europe/Andorra,Europe/Andorra,Europe/Andorra,Europe/Andorra
4,la Massana,1.51483,42.54499,Europe/Andorra,Europe/Andorra,Europe/Andorra,Europe/Andorra
...,...,...,...,...,...,...,...
145606,Beitbridge,30.00000,-22.21667,Africa/Harare,Africa/Harare,Africa/Harare,Africa/Harare
145607,Beatrice,30.84730,-18.25283,Africa/Harare,Africa/Harare,Africa/Harare,Africa/Harare
145608,Banket,30.40000,-17.38333,Africa/Harare,Africa/Harare,Africa/Harare,Africa/Harare
145609,Epworth,31.14750,-17.89000,Africa/Harare,Africa/Harare,Africa/Harare,Africa/Harare


## Polars

In [16]:
p_df = pl.from_dicts(cities_as_dict)

In [17]:
p_df

name,lng,lat
str,f64,f64
"""Sant Julià de …",1.49129,42.46372
"""Pas de la Casa…",1.73361,42.54277
"""Ordino""",1.53319,42.55623
"""les Escaldes""",1.53414,42.50729
"""la Massana""",1.51483,42.54499
"""Encamp""",1.58014,42.53474
"""Canillo""",1.59756,42.5676
"""Arinsal""",1.48453,42.57205
"""Andorra la Vel…",1.52109,42.50779
"""Umm Al Quwain …",55.55517,25.56473


In [18]:
%%time
p_df = p_df.with_columns(
    pl.struct(["lng", "lat"])
    .apply(lambda cols: tf.timezone_at(lng=cols["lng"], lat=cols["lat"]))
    .alias("tz_from_timezonefinder")
)



CPU times: user 2 s, sys: 118 ms, total: 2.12 s
Wall time: 2.12 s


In [19]:
%%time
p_df = p_df.with_columns(
    pl.struct(["lng", "lat"])
    .apply(lambda cols: tzfpy.get_tz(cols["lng"], cols["lat"]))
    .alias("tz_from_tzfpy")
)



CPU times: user 258 ms, sys: 8.47 ms, total: 267 ms
Wall time: 266 ms


In [20]:
p_df

name,lng,lat,tz_from_timezonefinder,tz_from_tzfpy
str,f64,f64,str,str
"""Sant Julià de …",1.49129,42.46372,"""Europe/Andorra…","""Europe/Andorra…"
"""Pas de la Casa…",1.73361,42.54277,"""Europe/Andorra…","""Europe/Andorra…"
"""Ordino""",1.53319,42.55623,"""Europe/Andorra…","""Europe/Andorra…"
"""les Escaldes""",1.53414,42.50729,"""Europe/Andorra…","""Europe/Andorra…"
"""la Massana""",1.51483,42.54499,"""Europe/Andorra…","""Europe/Andorra…"
"""Encamp""",1.58014,42.53474,"""Europe/Andorra…","""Europe/Andorra…"
"""Canillo""",1.59756,42.5676,"""Europe/Andorra…","""Europe/Andorra…"
"""Arinsal""",1.48453,42.57205,"""Europe/Andorra…","""Europe/Andorra…"
"""Andorra la Vel…",1.52109,42.50779,"""Europe/Andorra…","""Europe/Andorra…"
"""Umm Al Quwain …",55.55517,25.56473,"""Asia/Dubai""","""Asia/Dubai"""


## Polars via `polars_dates`

In [21]:
import polars_dates as pl_dates

lib is /Users/ringsaturn/Projects/df-lng-lat-timezones/deps/polars-dates/polars_dates/_internal.cpython-311-darwin.so


In [22]:
%%time
p_df = p_df.with_columns(
    tz_from_polars_dates_tz=pl_dates.lookup_timezone(pl.col("lat"), pl.col("lng"))
)

lookup_timezone function called col("lat") col("lng")
Yo I got col("lat")
lats shape: (145_611,)
Series: 'lat' [f64]
[
	42.46372
	42.54277
	42.55623
	42.50729
	42.54499
	42.53474
	42.5676
	42.57205
	42.50779
	25.56473
	25.78953
	23.14355
	…
	-17.36667
	-19.8
	-18.13021
	-18.06294
	-16.72289
	-20.15
	-17.62027
	-17.30192
	-22.21667
	-18.25283
	-17.38333
	-17.89
	-18.01274
]
lons shape: (145_611,)
Series: 'lng' [f64]
[
	1.49129
	1.73361
	1.53319
	1.53414
	1.51483
	1.58014
	1.59756
	1.48453
	1.52109
	55.55517
	55.9432
	53.7881
	…
	30.2
	32.86667
	30.14074
	29.89246
	31.11462
	28.58333
	27.34139
	31.33056
	30.0
	30.8473
	30.4
	31.1475
	31.07555
]
CPU times: user 1.46 s, sys: 7.26 ms, total: 1.47 s
Wall time: 1.47 s


## Pure NumPy

In [23]:
lng_array = np.array(df["lng"])
lat_array = np.array(df["lat"])

In [24]:
%%time
_ = vec_tzfpy_get_tz(lng_array, lat_array)

CPU times: user 190 ms, sys: 1.16 ms, total: 191 ms
Wall time: 190 ms


  outputs = ufunc(*inputs)


In [25]:
%%time
_ = vec_timezonefinder_timezone_at(lng=lng_array, lat=lat_array)

CPU times: user 1.93 s, sys: 32.5 ms, total: 1.96 s
Wall time: 1.96 s
