# Batch convert lng,lat to timezone name in dataframes

## Setup

In [1]:
import pandas as pd
import polars as pl
import citiespy
import tzfpy
from timezonefinder import TimezoneFinder

In [2]:
tf = TimezoneFinder(in_memory=True)
tf.timezone_at(lng=0, lat=0)

'Etc/GMT'

In [3]:
tzfpy.get_tz(0, 0)

'Etc/GMT'

In [4]:
dir(citiespy)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'all_cities',
 'citiespy',
 'random_city']

In [5]:
cities_as_dict = []
for city in citiespy.all_cities():
    cities_as_dict.append({"name": city.name, "lng": city.lng, "lat": city.lat})

In [6]:
len(cities_as_dict)

140874

## Pandas

In [7]:
df = pd.DataFrame(cities_as_dict)

In [8]:
df

Unnamed: 0,name,lng,lat
0,Sant Julià de Lòria,1.49129,42.46372
1,Pas de la Casa,1.73361,42.54277
2,Ordino,1.53319,42.55623
3,les Escaldes,1.53414,42.50729
4,la Massana,1.51483,42.54499
...,...,...,...
140869,Beitbridge,30.00000,-22.21667
140870,Beatrice,30.84730,-18.25283
140871,Banket,30.40000,-17.38333
140872,Epworth,31.14750,-17.89000


In [9]:
%%time
df["tz_from_timezonefinder"] = df.apply(lambda x: tf.timezone_at(lng=x.lng, lat=x.lat), axis=1)

KeyboardInterrupt: 

In [10]:
%%time
df["tz_from_tzfpy"] = df.apply(lambda x: tzfpy.get_tz(x.lng, x.lat), axis=1)

CPU times: user 2.84 s, sys: 48.1 ms, total: 2.89 s
Wall time: 2.91 s


In [11]:
df

Unnamed: 0,name,lng,lat,tz_from_tzfpy
0,Sant Julià de Lòria,1.49129,42.46372,Europe/Andorra
1,Pas de la Casa,1.73361,42.54277,Europe/Andorra
2,Ordino,1.53319,42.55623,Europe/Andorra
3,les Escaldes,1.53414,42.50729,Europe/Andorra
4,la Massana,1.51483,42.54499,Europe/Andorra
...,...,...,...,...
140869,Beitbridge,30.00000,-22.21667,Africa/Harare
140870,Beatrice,30.84730,-18.25283,Africa/Harare
140871,Banket,30.40000,-17.38333,Africa/Harare
140872,Epworth,31.14750,-17.89000,Africa/Harare


## Pandas(Vectorized)

In [12]:
import numpy as np

vec_tzfpy_get_tz = np.vectorize(tzfpy.get_tz)
vec_timezonefinder_timezone_at = np.vectorize(tf.timezone_at)

In [13]:
%%time
df["tz_from_tzfpy_vectorized"] = vec_tzfpy_get_tz(df["lng"], df["lat"])

CPU times: user 559 ms, sys: 38.2 ms, total: 597 ms
Wall time: 602 ms


  outputs = ufunc(*inputs)


In [None]:
%%time
df["tz_from_timezonefinder_vectorized"] = vec_timezonefinder_timezone_at(lng=df["lng"], lat=df["lat"])

In [None]:
df

## Polars

In [None]:
p_df = pl.from_dicts(cities_as_dict)

In [None]:
p_df

In [None]:
%%time
p_df = p_df.with_columns(
    pl.struct(["lng", "lat"])
    .apply(lambda cols: tf.timezone_at(lng=cols["lng"], lat=cols["lat"]))
    .alias("tz_from_timezonefinder")
)

In [None]:
%%time
p_df = p_df.with_columns(
    pl.struct(["lng", "lat"])
    .apply(lambda cols: tzfpy.get_tz(cols["lng"], cols["lat"]))
    .alias("tz_from_tzfpy")
)

In [None]:
p_df