In [3]:
import requests
import json
import time

# Define the base URL for the CKAN API
base_url = "https://www.govdata.de/ckan/api/3/action/"

# Function to get a specified number of datasets with pagination
def get_recent_datasets_metadata(limit=15000, chunk_size=100, timeout=2):
    datasets = []
    rows = 100  # Number of datasets per request (max 1000 according to CKAN API)
    start = 0
    
    while start < limit:
        search_url = base_url + "package_search"
        params = {
            "rows": min(rows, limit - start),
            "start": start
        }
        response = requests.get(search_url, params=params)
        data = response.json()
        
        if not data['success']:
            break
        
        results = data['result']['results']
        if not results:
            break
        
        datasets.extend(results)
        start += rows
        
        # Print progress in chunks to avoid overwhelming the output buffer
        if len(datasets) % chunk_size == 0:
            print(f"Retrieved {len(datasets)} datasets so far...")
        
        # Introduce a delay to avoid hitting the server too hard
        time.sleep(timeout)
    
    return datasets

# Example usage
if __name__ == "__main__":
    recent_datasets_metadata = get_recent_datasets_metadata(limit=15000)
    
    # Save the results to a file to avoid overwhelming Jupyter output
    with open('datasets_metadata.json', 'w') as f:
        json.dump(recent_datasets_metadata, f, indent=4)
    
    print(f"Retrieved metadata for {len(recent_datasets_metadata)} datasets. Data saved to 'datasets_metadata_germany.json'.")



Retrieved 100 datasets so far...
Retrieved 200 datasets so far...
Retrieved 300 datasets so far...
Retrieved 400 datasets so far...
Retrieved 500 datasets so far...
Retrieved 600 datasets so far...
Retrieved 700 datasets so far...
Retrieved 800 datasets so far...
Retrieved 900 datasets so far...
Retrieved 1000 datasets so far...
Retrieved 1100 datasets so far...
Retrieved 1200 datasets so far...
Retrieved 1300 datasets so far...
Retrieved 1400 datasets so far...
Retrieved 1500 datasets so far...
Retrieved 1600 datasets so far...
Retrieved 1700 datasets so far...
Retrieved 1800 datasets so far...
Retrieved 1900 datasets so far...
Retrieved 2000 datasets so far...
Retrieved 2100 datasets so far...
Retrieved 2200 datasets so far...
Retrieved 2300 datasets so far...
Retrieved 2400 datasets so far...
Retrieved 2500 datasets so far...
Retrieved 2600 datasets so far...
Retrieved 2700 datasets so far...
Retrieved 2800 datasets so far...
Retrieved 2900 datasets so far...
Retrieved 3000 datasets

In [5]:
import pandas as pd

df = pd.DataFrame(recent_datasets_metadata)

In [9]:
df

Unnamed: 0,author,author_email,creator_user_id,id,isopen,license_id,license_title,maintainer,maintainer_email,metadata_created,...,title,type,url,version,extras,groups,resources,tags,relationships_as_subject,relationships_as_object
0,Hanse- und Universitätsstadt Rostock – Hauptamt,geodienste@rostock.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,05be0aa5-76a2-4850-b37f-69e623565fbe,False,,,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,2023-01-06T06:14:11.920576,...,Bundestagswahl 2013,dataset,https://www.opendata-hro.de/dataset/e7c10dd4-d...,,"[{'key': 'access_rights', 'value': 'http://pub...","[{'description': '', 'display_name': 'Regierun...",[{'access_url': 'https://geo.sv.rostock.de/dow...,"[{'display_name': 'politik', 'id': 'a8e6e4aa-6...",[],[]
1,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,44fa7d61-7924-4740-95ec-b4a6821139fe,False,,,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,2024-05-08T06:31:53.940777,...,Bodenrichtwerte 2015,dataset,https://www.opendata-hro.de/dataset/c276228b-9...,,"[{'key': 'access_rights', 'value': 'http://pub...","[{'description': '', 'display_name': 'Regierun...",[{'access_url': 'https://geo.sv.rostock.de/dow...,"[{'display_name': 'bauleitplanung', 'id': '861...",[],[]
2,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,6219e4c3-e819-40f0-86cd-543ff829ad6c,False,,,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,2024-05-08T06:31:46.141017,...,Bodenrichtwerte 2008,dataset,https://www.opendata-hro.de/dataset/655f46c2-1...,,"[{'key': 'access_rights', 'value': 'http://pub...","[{'description': '', 'display_name': 'Regierun...",[{'access_url': 'https://geo.sv.rostock.de/dow...,"[{'display_name': 'bauleitplanung', 'id': '861...",[],[]
3,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,7c46a2cc-4fe3-4399-8833-61b016730b71,False,,,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,2024-05-08T06:32:00.483952,...,38. Internationaler Hansetag Rostock,dataset,https://www.opendata-hro.de/dataset/7afdfa7a-7...,,"[{'key': 'access_rights', 'value': 'http://pub...","[{'description': '', 'display_name': 'Bildung,...",[{'access_url': 'https://geo.sv.rostock.de/dow...,"[{'display_name': 'freizeit', 'id': '73a5f345-...",[],[]
4,Hanse- und Universitätsstadt Rostock – Hauptamt,geodienste@rostock.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,4d25536c-7cc4-4761-8504-0ba8ca320fa1,False,,,Hanse- und Universitätsstadt Rostock – Kataste...,geodienste@rostock.de,2024-05-08T06:32:18.276436,...,Ortsamtsbereiche,dataset,https://www.opendata-hro.de/dataset/aceccfc0-d...,,"[{'key': 'access_rights', 'value': 'http://pub...","[{'description': '', 'display_name': 'Regierun...",[{'access_url': 'https://geo.sv.rostock.de/dow...,"[{'display_name': 'behörde', 'id': 'e15183d8-d...",[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,Verbandsgemeinde Eich,poststelle@vg-eich.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,e725ccc0-5267-4bd0-b81c-32515ab32d8f,False,,,,,2022-01-26T10:17:23.776277,...,Die Mühläcker,dataset,,,"[{'key': 'author_contacttype', 'value': 'Organ...","[{'description': '', 'display_name': 'Bevölker...",[{'access_url': 'https://www.geoportal.rlp.de/...,"[{'display_name': 'bauleitplan', 'id': '8dfff9...",[],[]
14996,Ortsgemeinde Schwedelbach,info@vg-weilerbach.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,1620d684-d596-46b7-bc24-40250d38a402,False,,,,,2023-08-07T04:51:22.657594,...,Auf dem Hebel II,dataset,,,"[{'key': 'author_contacttype', 'value': 'Organ...","[{'description': '', 'display_name': 'Bevölker...",[{'access_url': 'https://www.geoportal.rlp.de/...,"[{'display_name': 'bauleitplan', 'id': '8dfff9...",[],[]
14997,Verbandsgemeinde Altenkirchen,rathaus@vg-altenkirchen.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,386ee29a-4ee2-4291-8f78-07d163230060,False,,,,,2022-01-26T09:30:21.252167,...,Wochenendhausgebiet Kircheiber Born,dataset,,,"[{'key': 'author_contacttype', 'value': 'Organ...","[{'description': '', 'display_name': 'Bevölker...",[{'access_url': 'https://www.geoportal.rlp.de/...,"[{'display_name': 'bauleitplan', 'id': '8dfff9...",[],[]
14998,Ortsgemeinde Erzenhausen,info@vg-weilerbach.de,f512f73d-bed2-439d-8f6f-3fbc6048b24e,d4ebb690-762e-4624-864d-d03383a36ada,False,,,,,2023-08-07T04:51:33.293593,...,Im Borfeld Änderung II,dataset,,,"[{'key': 'author_contacttype', 'value': 'Organ...","[{'description': '', 'display_name': 'Bevölker...",[{'access_url': 'https://www.geoportal.rlp.de/...,"[{'display_name': 'bauleitplan', 'id': '8dfff9...",[],[]


In [8]:
df.columns

Index(['author', 'author_email', 'creator_user_id', 'id', 'isopen',
       'license_id', 'license_title', 'maintainer', 'maintainer_email',
       'metadata_created', 'metadata_modified', 'name', 'notes',
       'num_resources', 'num_tags', 'organization', 'owner_org', 'private',
       'state', 'title', 'type', 'url', 'version', 'extras', 'groups',
       'resources', 'tags', 'relationships_as_subject',
       'relationships_as_object'],
      dtype='object')

In [11]:
df.url.tolist()

['https://www.opendata-hro.de/dataset/e7c10dd4-d8fb-440c-9fc1-e2649b73e5d7',
 'https://www.opendata-hro.de/dataset/c276228b-9c7b-4812-b4a5-7480316c2233',
 'https://www.opendata-hro.de/dataset/655f46c2-102b-45a6-a9c4-cc7cb56d47df',
 'https://www.opendata-hro.de/dataset/7afdfa7a-7fd4-48a2-be63-e83750eb1c40',
 'https://www.opendata-hro.de/dataset/aceccfc0-dbf2-42a3-9456-389c6a0edb7c',
 'https://www.opendata-hro.de/dataset/8c74c345-92a1-4245-a42a-71c0beef8160',
 'https://www.opendata-hro.de/dataset/b5375466-e593-4a0e-9005-ab6a9bc22395',
 'https://www.opendata-hro.de/dataset/f709d112-a2d1-4098-a8ca-41e364899490',
 'https://www.opendata-hro.de/dataset/7c3a523b-6ef0-46f7-851b-da5340852439',
 'https://www.opendata-hro.de/dataset/d59dfb19-d22d-421d-b4af-53a600a18cbd',
 'https://www.opendata-hro.de/dataset/a3ccc8c3-810d-49c2-a1c2-b82645e87c97',
 'https://www.opendata-hro.de/dataset/bdbbae58-41e0-4453-bfe3-369e5c427cd0',
 'https://www.opendata-hro.de/dataset/d02483e4-6dca-45ee-9cf4-97cf27742179',

In [13]:
df.to_csv(r'C:\Users\MolnarM\Downloads\datasets_metadata_germany.csv', index=False)

In [14]:
import requests
import json
import time
import pandas as pd

# Define the base URL for the CKAN API
base_url = "https://data.gov.ie/api/3/action/"

# Function to get a specified number of datasets with pagination
def get_recent_datasets_metadata(limit=15000, chunk_size=100, timeout=2):
    datasets = []
    rows = 100  # Number of datasets per request (max 1000 according to CKAN API)
    start = 0
    
    while start < limit:
        search_url = base_url + "package_search"
        params = {
            "rows": min(rows, limit - start),
            "start": start
        }
        response = requests.get(search_url, params=params)
        data = response.json()
        
        if not data['success']:
            break
        
        results = data['result']['results']
        if not results:
            break
        
        datasets.extend(results)
        start += rows
        
        # Print progress in chunks to avoid overwhelming the output buffer
        if len(datasets) % chunk_size == 0:
            print(f"Retrieved {len(datasets)} datasets so far...")
        
        # Introduce a delay to avoid hitting the server too hard
        time.sleep(timeout)
    
    return datasets

# Example usage
if __name__ == "__main__":
    recent_datasets_metadata = get_recent_datasets_metadata(limit=15000)
    
    # Convert to DataFrame
    df = pd.DataFrame(recent_datasets_metadata)
    
    # Save the DataFrame to a CSV file
    df.to_csv(r'C:\Users\MolnarM\Downloads\datasets_metadata_ireland.csv', index=False)
    
    print(f"Retrieved metadata for {len(recent_datasets_metadata)} datasets. Data saved to 'datasets_metadata_ireland.csv'.")


Retrieved 100 datasets so far...
Retrieved 200 datasets so far...
Retrieved 300 datasets so far...
Retrieved 400 datasets so far...
Retrieved 500 datasets so far...
Retrieved 600 datasets so far...
Retrieved 700 datasets so far...
Retrieved 800 datasets so far...
Retrieved 900 datasets so far...
Retrieved 1000 datasets so far...
Retrieved 1100 datasets so far...
Retrieved 1200 datasets so far...
Retrieved 1300 datasets so far...
Retrieved 1400 datasets so far...
Retrieved 1500 datasets so far...
Retrieved 1600 datasets so far...
Retrieved 1700 datasets so far...
Retrieved 1800 datasets so far...
Retrieved 1900 datasets so far...
Retrieved 2000 datasets so far...
Retrieved 2100 datasets so far...
Retrieved 2200 datasets so far...
Retrieved 2300 datasets so far...
Retrieved 2400 datasets so far...
Retrieved 2500 datasets so far...
Retrieved 2600 datasets so far...
Retrieved 2700 datasets so far...
Retrieved 2800 datasets so far...
Retrieved 2900 datasets so far...
Retrieved 3000 datasets

In [1]:
import requests
import json
import time
import pandas as pd

# Define the base URL for the CKAN API
base_url = "https://opendata.swiss/api/3/action/"

# Function to get a specified number of datasets with pagination
def get_recent_datasets_metadata(limit=15000, chunk_size=100, timeout=2):
    datasets = []
    rows = 100  # Number of datasets per request (max 1000 according to CKAN API)
    start = 0
    
    while start < limit:
        search_url = base_url + "package_search"
        params = {
            "rows": min(rows, limit - start),
            "start": start
        }
        response = requests.get(search_url, params=params)
        data = response.json()
        
        if not data['success']:
            break
        
        results = data['result']['results']
        if not results:
            break
        
        datasets.extend(results)
        start += rows
        
        # Print progress in chunks to avoid overwhelming the output buffer
        if len(datasets) % chunk_size == 0:
            print(f"Retrieved {len(datasets)} datasets so far...")
        
        # Introduce a delay to avoid hitting the server too hard
        time.sleep(timeout)
    
    return datasets

# Example usage
if __name__ == "__main__":
    recent_datasets_metadata = get_recent_datasets_metadata(limit=15000)
    
    # Convert to DataFrame
    df = pd.DataFrame(recent_datasets_metadata)
    
    # Save the DataFrame to a CSV file
    df.to_csv('datasets_metadata_switzerland.csv', index=False)
    
    print(f"Retrieved metadata for {len(recent_datasets_metadata)} datasets. Data saved to 'datasets_metadata_switzerland.csv'.")


Retrieved 100 datasets so far...
Retrieved 200 datasets so far...
Retrieved 300 datasets so far...
Retrieved 400 datasets so far...
Retrieved 500 datasets so far...
Retrieved 600 datasets so far...
Retrieved 700 datasets so far...
Retrieved 800 datasets so far...
Retrieved 900 datasets so far...
Retrieved 1000 datasets so far...
Retrieved 1100 datasets so far...
Retrieved 1200 datasets so far...
Retrieved 1300 datasets so far...
Retrieved 1400 datasets so far...
Retrieved 1500 datasets so far...
Retrieved 1600 datasets so far...
Retrieved 1700 datasets so far...
Retrieved 1800 datasets so far...
Retrieved 1900 datasets so far...
Retrieved 2000 datasets so far...
Retrieved 2100 datasets so far...
Retrieved 2200 datasets so far...
Retrieved 2300 datasets so far...
Retrieved 2400 datasets so far...
Retrieved 2500 datasets so far...
Retrieved 2600 datasets so far...
Retrieved 2700 datasets so far...
Retrieved 2800 datasets so far...
Retrieved 2900 datasets so far...
Retrieved 3000 datasets

In [2]:
df.to_csv(r'C:\Users\MolnarM\Downloads\datasets_metadata_switzerland.csv')