In [54]:
import pycurl
import json
from io import BytesIO
import pandas as pd
import re

In [69]:
def get_metadata(url, filter):   
    buffer = BytesIO()
    c = pycurl.Curl()
    c.setopt(c.URL, f'{url}package_search?q=title:{filter}')
    c.setopt(c.WRITEDATA, buffer)

    c.perform()
    c.close()

    response = buffer.getvalue().decode('utf-8')

    if not response.strip():
        print("The response is empty.")
    else:
        try:
            data = json.loads(response)

            if data.get('success'):
                return data
            else:
                print("API did not return a successful response:", data)
        except json.JSONDecodeError as e:
            print("Failed to decode JSON:", e)
            print("Response content:", response)

def save_json(data, filename):
    with open(filename, 'w') as json_file:
        json.dump(data, json_file, indent=4)

def filter_result_by_year(data, year):
    for result in data['result']['results']:
        if str(year) in result['name']:
            return result
        
def get_resource_url_for_year(data, year):

    combined_data = {
        'data': [],
        'metadata': []
    }

    result = filter_result_by_year(data, year)

    for resource in result['resources']:
        url = resource['url']
        if "Haltepunkt" in url or "Haltestelle" in url:
            combined_data['metadata'].append(url)
        else:
            combined_data['data'].append(url)

    return combined_data

def pack_all_urls_into_one_dict(url, filter):
    data = get_metadata(url, filter)

    all_urls = {}

    for dataset in data['result']['results']:
        year = re.search(r'\d{4}', dataset['name']).group(0)

        all_urls[year] = get_resource_url_for_year(data, year)
    
    all_urls_sorted = dict(sorted(all_urls.items()))

    return all_urls_sorted


In [70]:
url = "https://ckan.opendata.swiss/api/3/action/"
filter = "Fahrzeiten"

download_url = pack_all_urls_into_one_dict(url, filter)

In [71]:
download_url.keys()

dict_keys(['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023'])

In [74]:
download_url['2019']['metadata']

['https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2019/download/Haltepunkt.csv',
 'https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2019/download/Haltestelle.csv']

In [75]:
df = pd.read_csv(download_url['2019']['metadata'][0])

In [76]:
df

Unnamed: 0,halt_punkt_id,halt_punkt_diva,halt_id,GPS_Latitude,GPS_Longitude,GPS_Bearing,halt_punkt_ist_aktiv
0,303,51,143,47.360017,8.456337,85.0,False
1,304,50,143,47.360153,8.456180,270.0,False
2,686,50,309,47.368125,8.463072,212.0,False
3,687,51,309,47.368433,8.463819,19.0,False
4,823,51,373,47.452401,8.571871,208.0,False
...,...,...,...,...,...,...,...
17993,50754,10,2721,47.357409,8.597448,147.0,True
17994,50755,50,2715,47.382085,8.598821,223.0,True
17995,50756,81,588,47.450237,8.563471,270.0,True
17996,50758,80,588,47.450242,8.563842,90.0,True
