In [1]:
import pandas as pd

In [2]:
ipv4 = pd.read_csv("geolite/ipv4.csv")
ipv4.head()

Unnamed: 0,network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider
0,1.0.0.0/24,2077456.0,2077456.0,,0,0
1,1.0.1.0/24,1814991.0,1814991.0,,0,0
2,1.0.2.0/23,1814991.0,1814991.0,,0,0
3,1.0.4.0/22,2077456.0,2077456.0,,0,0
4,1.0.8.0/21,1814991.0,1814991.0,,0,0


In [3]:
ipv4.drop(
    columns=[
        "registered_country_geoname_id",
        "represented_country_geoname_id",
        "is_anonymous_proxy",
        "is_satellite_provider",
    ],
    inplace=True,
)
ipv4.head()

Unnamed: 0,network,geoname_id
0,1.0.0.0/24,2077456.0
1,1.0.1.0/24,1814991.0
2,1.0.2.0/23,1814991.0
3,1.0.4.0/22,2077456.0
4,1.0.8.0/21,1814991.0


In [4]:
ipv4["geoname_id"] = ipv4["geoname_id"].fillna(0).astype(int)
ipv4.head()

Unnamed: 0,network,geoname_id
0,1.0.0.0/24,2077456
1,1.0.1.0/24,1814991
2,1.0.2.0/23,1814991
3,1.0.4.0/22,2077456
4,1.0.8.0/21,1814991


In [5]:
id2c = pd.read_csv("geolite/id_to_country.csv")
id2c.head()

Unnamed: 0,geoname_id,locale_code,continent_code,continent_name,country_iso_code,country_name,is_in_european_union
0,49518,en,AF,Africa,RW,Rwanda,0
1,51537,en,AF,Africa,SO,Somalia,0
2,69543,en,AS,Asia,YE,Yemen,0
3,99237,en,AS,Asia,IQ,Iraq,0
4,102358,en,AS,Asia,SA,Saudi Arabia,0


In [6]:
id2c.drop(columns=["locale_code", "continent_code", "continent_name", "country_name", "is_in_european_union"], inplace=True)
id2c.head()

Unnamed: 0,geoname_id,country_iso_code
0,49518,RW
1,51537,SO
2,69543,YE
3,99237,IQ
4,102358,SA


In [36]:
merged_df = pd.merge(ipv4, id2c, on=["geoname_id"], how="outer")
merged_df.tail(15)

Unnamed: 0,network,geoname_id,country_iso_code
393365,193.253.98.0/24,1024031,YT
393366,193.253.110.0/23,1024031,YT
393367,193.253.112.0/24,1024031,YT
393368,193.253.126.0/23,1024031,YT
393369,194.3.192.0/20,1024031,YT
393370,194.50.99.247/32,1024031,YT
393371,194.50.111.247/32,1024031,YT
393372,213.55.56.0/22,1024031,YT
393373,213.55.60.0/23,1024031,YT
393374,213.222.72.0/21,1024031,YT


In [16]:
merged_df.drop(columns=["geoname_id"], inplace=True)
merged_df.head()

Unnamed: 0,network,country_iso_code
0,1.0.0.0/24,AU
1,1.0.4.0/22,AU
2,1.1.1.0/32,AU
3,1.1.1.2/31,AU
4,1.1.1.4/30,AU


In [18]:
merged_df.to_csv("geolite/final.csv", index=False)

In [26]:
prefixes = list(merged_df["network"])

In [34]:
from netaddr import cidr_merge
merged_ips = cidr_merge(prefixes)
len(merged_ips)

12461

In [35]:
import requests
from typing import Tuple

GEOAPIFY_API_KEY = "e3b2b5940f82418689f369419e58168e"

def get_lat_long_for_ip(ip: str) -> Tuple[float, float]:
    url = f"https://api.geoapify.com/v1/ipinfo?ip={ip}&apiKey={GEOAPIFY_API_KEY}"
    response = requests.get(url).json()
    location = response["location"]
    lat, long = location["latitude"], location["longitude"]
    return lat, long

In [44]:
table = {}

for prefix in merged_ips[:10]:
    location = get_lat_long_for_ip(prefix)
    table[prefix] = location

In [45]:
table

{IPNetwork('1.0.0.0/8'): (-27.4767, 153.017),
 IPNetwork('2.0.0.0/11'): (48.8566, 2.35222),
 IPNetwork('2.32.0.0/12'): (40.8518, 14.2681),
 IPNetwork('2.48.0.0/13'): (25.2048, 55.2708),
 IPNetwork('2.56.0.0/16'): (39.7447, -75.5484),
 IPNetwork('2.57.4.0/22'): (41.8521, 3.13048),
 IPNetwork('2.57.8.0/21'): (51.4025, 21.147100000000002),
 IPNetwork('2.57.16.0/20'): (25.033, 121.565),
 IPNetwork('2.57.32.0/19'): (45.4642, 9.18998),
 IPNetwork('2.57.64.0/18'): (48.1486, 17.1077)}

In [46]:
from netaddr import IPSet

ip_set = IPSet(merged_ips)

In [52]:
"155.33.134.52" in ip_set

True