In [1]:
import numpy as np
import pandas as pd
from IPython.display import display

import polars as pl
from ipaddress import ip_address

In [2]:
def ip_to_int(ip):
    return int(ip_address(ip))

# IP range table
ranges = pl.DataFrame({
    "start_ip": [ip_to_int("1.0.0.0"), ip_to_int("2.0.0.0")],
    "end_ip": [ip_to_int("1.255.255.255"), ip_to_int("2.255.255.255")],
    "country": ["CountryA", "CountryB"]
}).sort("start_ip")

# IPs to lookup
ips = pl.DataFrame({
    "ip": [ip_to_int("1.2.3.4"), ip_to_int("2.128.0.1"), ip_to_int("3.0.0.1"), ip_to_int("0.0.0.1")]
}).sort("ip")

In [3]:
display(ranges)
display(ips)

start_ip,end_ip,country
i64,i64,str
16777216,33554431,"""CountryA"""
33554432,50331647,"""CountryB"""


ip
i64
1
16909060
41943041
50331649


In [4]:
ips = ips.sort("ip")
ranges = ranges.sort("start_ip")

result = ips.join_asof(
    ranges,
    left_on="ip",
    right_on="start_ip",
    strategy="backward"
)

In [5]:
result

ip,start_ip,end_ip,country
i64,i64,i64,str
1,,,
16909060,16777216.0,33554431.0,"""CountryA"""
41943041,33554432.0,50331647.0,"""CountryB"""
50331649,33554432.0,50331647.0,"""CountryB"""


In [6]:
# Filter out IPs that are not within the range
result = result.with_columns([
    pl.when(pl.col("ip") > pl.col("end_ip"))
      .then(None)
      .otherwise(pl.col("country"))
      .alias("country_mapped")
])

In [7]:
result

ip,start_ip,end_ip,country,country_mapped
i64,i64,i64,str,str
1,,,,
16909060,16777216.0,33554431.0,"""CountryA""","""CountryA"""
41943041,33554432.0,50331647.0,"""CountryB""","""CountryB"""
50331649,33554432.0,50331647.0,"""CountryB""",
