In [6]:
import requests
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD
import time

username = "monir.najem"  # Add your GeoNames username here
g = Graph()

# Define namespaces
GN = Namespace("http://www.geonames.org/ontology#")
g.bind("gn", GN)

def get_country_info(country_code):
    try:
        url = f"http://api.geonames.org/countryInfoJSON?country={country_code}&username={username}"
        response = requests.get(url)
        response.raise_for_status()
        
        if 'geonames' in response.json() and response.json()['geonames']:
            data = response.json()['geonames'][0]
            country_uri = URIRef(f"http://sws.geonames.org/{data['geonameId']}/")
            
            # Add basic country information
            g.add((country_uri, RDF.type, GN.Country))
            g.add((country_uri, RDFS.label, Literal(data['countryName'], lang='en')))
            g.add((country_uri, GN.population, Literal(int(data['population']))))
            g.add((country_uri, GN.areaInSqKm, Literal(float(data['areaInSqKm']))))
            g.add((country_uri, GN.countryCode, Literal(data['countryCode'])))
            
            # Get capital city info
            if 'capital' in data:
                get_capital_info(data['capital'], country_uri)
            
            print(f"Added data for {data['countryName']}")
            time.sleep(1)  # Rate limiting
            return True
    except Exception as e:
        print(f"Error processing {country_code}: {e}")
    return False

def get_capital_info(capital_name, country_uri):
    try:
        url = f"http://api.geonames.org/searchJSON?q={capital_name}&featureClass=P&maxRows=1&username={username}"
        response = requests.get(url)
        response.raise_for_status()
        
        if 'geonames' in response.json() and response.json()['geonames']:
            capital_data = response.json()['geonames'][0]
            capital_uri = URIRef(f"http://sws.geonames.org/{capital_data['geonameId']}/")
            
            g.add((capital_uri, RDF.type, GN.City))
            g.add((capital_uri, RDFS.label, Literal(capital_data['name'], lang='en')))
            g.add((capital_uri, GN.population, Literal(int(capital_data.get('population', 0)))))
            g.add((country_uri, GN.capital, capital_uri))
            
            time.sleep(1)  # Rate limiting
    except Exception as e:
        print(f"Error processing capital {capital_name}: {e}")

# List of country codes to process
european_countries = ['FR', 'DE', 'IT', 'ES', 'GB', 'PL', 'RO', 'NL', 'BE', 'GR', 'CZ', 'PT', 'SE', 'HU', 'AT', 
                    'BG', 'DK', 'FI', 'SK', 'IE', 'HR', 'LT', 'SI', 'LV', 'EE', 'CY', 'LU', 'MT', 'IS', 'NO', 'CH']
countries = european_countries
for code in countries:
    get_country_info(code)

# Save to file
g.serialize("geonames_data.ttl", format="turtle")
print(f"Total triples in graph: {len(g)}")

Added data for France
Added data for Germany
Added data for Italy
Added data for Spain
Added data for United Kingdom
Added data for Poland
Added data for Romania
Added data for The Netherlands
Added data for Belgium
Added data for Greece
Added data for Czechia
Added data for Portugal
Added data for Sweden
Added data for Hungary
Added data for Austria
Added data for Bulgaria
Added data for Denmark
Added data for Finland
Added data for Slovakia
Added data for Ireland
Added data for Croatia
Added data for Lithuania
Added data for Slovenia
Added data for Latvia
Added data for Estonia
Added data for Cyprus
Added data for Luxembourg
Added data for Malta
Added data for Iceland
Added data for Norway
Added data for Switzerland
Total triples in graph: 279
