# Batch convert lng,lat to timezone name in dataframes

## Setup

In [1]:
import pandas as pd
import polars as pl
import citiespy
import tzfpy
from timezonefinder import TimezoneFinder

In [2]:
tf = TimezoneFinder(in_memory=True)
tf.timezone_at(lng=0, lat=0)

'Etc/GMT'

In [3]:
tzfpy.get_tz(0, 0)

'Etc/GMT'

In [4]:
dir(citiespy)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'all_cities',
 'citiespy',
 'random_city']

In [5]:
cities_as_dict = []
for city in citiespy.all_cities():
    cities_as_dict.append({"name": city.name, "lng": city.lng, "lat": city.lat})

In [6]:
len(cities_as_dict)

140874

## Pandas

In [7]:
df = pd.DataFrame(cities_as_dict)

In [8]:
df

Unnamed: 0,name,lng,lat
0,Sant Julià de Lòria,1.49129,42.46372
1,Pas de la Casa,1.73361,42.54277
2,Ordino,1.53319,42.55623
3,les Escaldes,1.53414,42.50729
4,la Massana,1.51483,42.54499
...,...,...,...
140869,Beitbridge,30.00000,-22.21667
140870,Beatrice,30.84730,-18.25283
140871,Banket,30.40000,-17.38333
140872,Epworth,31.14750,-17.89000


In [9]:
%%time
df["tz_from_tzfpy"] = df.apply(lambda x: tzfpy.get_tz(x.lng, x.lat), axis=1)

CPU times: user 2.52 s, sys: 29.8 ms, total: 2.55 s
Wall time: 2.55 s


In [10]:
%%time
df["tz_from_timezonefinder"] = df.apply(lambda x: tf.timezone_at(lng=x.lng, lat=x.lat), axis=1)

CPU times: user 10.6 s, sys: 481 ms, total: 11.1 s
Wall time: 11.1 s


In [11]:
df

Unnamed: 0,name,lng,lat,tz_from_tzfpy,tz_from_timezonefinder
0,Sant Julià de Lòria,1.49129,42.46372,Europe/Andorra,Europe/Andorra
1,Pas de la Casa,1.73361,42.54277,Europe/Andorra,Europe/Andorra
2,Ordino,1.53319,42.55623,Europe/Andorra,Europe/Andorra
3,les Escaldes,1.53414,42.50729,Europe/Andorra,Europe/Andorra
4,la Massana,1.51483,42.54499,Europe/Andorra,Europe/Andorra
...,...,...,...,...,...
140869,Beitbridge,30.00000,-22.21667,Africa/Harare,Africa/Harare
140870,Beatrice,30.84730,-18.25283,Africa/Harare,Africa/Harare
140871,Banket,30.40000,-17.38333,Africa/Harare,Africa/Harare
140872,Epworth,31.14750,-17.89000,Africa/Harare,Africa/Harare


## Polars

In [12]:
p_df = pl.from_dicts(cities_as_dict)

In [13]:
p_df

name,lng,lat
str,f64,f64
"""Sant Julià de …",1.49129,42.46372
"""Pas de la Casa…",1.73361,42.54277
"""Ordino""",1.53319,42.55623
"""les Escaldes""",1.53414,42.50729
"""la Massana""",1.51483,42.54499
"""Encamp""",1.58014,42.53474
"""Canillo""",1.59756,42.5676
"""Arinsal""",1.48453,42.57205
"""Andorra la Vel…",1.52109,42.50779
"""Umm Al Quwain …",55.55517,25.56473


In [14]:
%%time
p_df = p_df.with_columns(
    pl.struct(["lng", "lat"])
    .apply(lambda cols: tzfpy.get_tz(cols["lng"], cols["lat"]))
    .alias("tz_from_tzfpy")
)

CPU times: user 703 ms, sys: 57.5 ms, total: 760 ms
Wall time: 752 ms


In [15]:
%%time
p_df = p_df.with_columns(
    pl.struct(["lng", "lat"])
    .apply(lambda cols: tf.timezone_at(lng=cols["lng"], lat=cols["lat"]))
    .alias("tz_from_timezonefinder")
)

CPU times: user 7.91 s, sys: 604 ms, total: 8.51 s
Wall time: 8.58 s


In [16]:
p_df

name,lng,lat,tz_from_tzfpy,tz_from_timezonefinder
str,f64,f64,str,str
"""Sant Julià de …",1.49129,42.46372,"""Europe/Andorra…","""Europe/Andorra…"
"""Pas de la Casa…",1.73361,42.54277,"""Europe/Andorra…","""Europe/Andorra…"
"""Ordino""",1.53319,42.55623,"""Europe/Andorra…","""Europe/Andorra…"
"""les Escaldes""",1.53414,42.50729,"""Europe/Andorra…","""Europe/Andorra…"
"""la Massana""",1.51483,42.54499,"""Europe/Andorra…","""Europe/Andorra…"
"""Encamp""",1.58014,42.53474,"""Europe/Andorra…","""Europe/Andorra…"
"""Canillo""",1.59756,42.5676,"""Europe/Andorra…","""Europe/Andorra…"
"""Arinsal""",1.48453,42.57205,"""Europe/Andorra…","""Europe/Andorra…"
"""Andorra la Vel…",1.52109,42.50779,"""Europe/Andorra…","""Europe/Andorra…"
"""Umm Al Quwain …",55.55517,25.56473,"""Asia/Dubai""","""Asia/Dubai"""
