In [21]:
import pycurl
import json
from io import BytesIO
import pandas as pd
import re

In [None]:
def get_metadata(url, filter):
    """
    Get metadata from the opendata API.

    Parameters:
    url (str): The URL to the API.
    filter (str): The filter to search for.

    Returns:
    dict: A JSON object containing the metadata if the request is successful.
    None: If the request is not successful or the response is empty.
    """
    buffer = BytesIO()
    c = pycurl.Curl()
    c.setopt(c.URL, f'{url}package_search?q=title:{filter}')
    c.setopt(c.WRITEDATA, buffer)

    c.perform()
    c.close()

    response = buffer.getvalue().decode('utf-8')

    if not response.strip():
        print("The response is empty.")
    else:
        try:
            data = json.loads(response)

            if data.get('success'):
                return data
            else:
                print("API did not return a successful response:", data)
        except json.JSONDecodeError as e:
            print("Failed to decode JSON:", e)
            print("Response content:", response)

def save_json(data, filename):
    with open(filename, 'w') as json_file:
        json.dump(data, json_file, indent=4)

def filter_result_by_year(data, year):
    for result in data['result']['results']:
        if str(year) in result['name']:
            return result
        
def get_resource_url_for_year(data, year):
    combined_data = {
        'data': [],
        'metadata': []
    }

    result = filter_result_by_year(data, year)

    for resource in result['resources']:
        url = resource['download_url']
        if "Haltepunkt" in url or "Haltestelle" in url:
            combined_data['metadata'].append(url)
        else:
            combined_data['data'].append(url)

    return combined_data

def pack_all_urls_into_one_dict(url, filter):
    data = get_metadata(url, filter)

    all_urls = {}

    for dataset in data['result']['results']:
        year = re.search(r'\d{4}', dataset['name']).group(0)

        all_urls[year] = get_resource_url_for_year(data, year)
    
    all_urls_sorted = dict(sorted(all_urls.items()))

    return all_urls_sorted


In [None]:
url = "https://ckan.opendata.swiss/api/3/action/"
filter = "Fahrzeiten"
storage_options = {'User-Agent': 'Mozilla/5.0'}

download_url = pack_all_urls_into_one_dict(url, filter)

In [32]:
download_url.keys()

dict_keys(['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023'])

In [39]:
for url in download_url['2019']['data']:
    if "2020" in url:
        print(url)


https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2019/download/Fahrzeiten_SOLL_IST_20191229_20200104.csv


In [25]:
download_url['2019']['metadata'][0]

'https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2019/download/Haltestelle.csv'

In [42]:
df = pd.read_csv(download_url['2019']['data'][0], storage_options=storage_options)

In [43]:
df

Unnamed: 0,linie,richtung,betriebsdatum,fahrzeug,kurs,seq_von,halt_diva_von,halt_punkt_diva_von,halt_kurz_von1,datum_von,...,fahrweg_id,fw_no,fw_typ,fw_kurz,fw_lang,umlauf_von,halt_id_von,halt_id_nach,halt_punkt_id_von,halt_punkt_id_nach
0,8,2,22.04.19,2110,6,1,799,5,EWYS,22.04.19,...,99831,6,1,6,EWYS - HARD für Ausfahrt,191788,1574,2705,44948,43348
1,8,2,22.04.19,2108,3,1,799,5,EWYS,22.04.19,...,99831,6,1,6,EWYS - HARD für Ausfahrt,191750,1574,2705,44948,43348
2,8,2,22.04.19,2102,4,1,799,5,EWYS,22.04.19,...,99831,6,1,6,EWYS - HARD für Ausfahrt,191757,1574,2705,44948,43348
3,8,2,21.04.19,2108,6,1,799,5,EWYS,21.04.19,...,99831,6,1,6,EWYS - HARD für Ausfahrt,191788,1574,2705,44948,43348
4,8,2,21.04.19,2120,3,1,799,5,EWYS,21.04.19,...,99831,6,1,6,EWYS - HARD für Ausfahrt,191750,1574,2705,44948,43348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358968,35,1,23.04.19,10334,1,5,852,20,FEUS,23.04.19,...,108198,3,1,3,SOLP - FEIC,186348,2539,2530,10816,47250
1358969,35,1,23.04.19,10334,1,5,852,20,FEUS,23.04.19,...,108198,3,1,3,SOLP - FEIC,186348,2539,2530,10816,47250
1358970,35,1,23.04.19,10334,1,5,852,20,FEUS,23.04.19,...,108198,3,1,3,SOLP - FEIC,186348,2539,2530,10816,47250
1358971,35,1,23.04.19,10334,1,5,852,20,FEUS,23.04.19,...,108198,3,1,3,SOLP - FEIC,186348,2539,2530,10816,47250


In [27]:
url = download_url['2018']['metadata'][0]

In [29]:
df

Unnamed: 0,halt_id,halt_diva,halt_kurz,halt_lang,halt_ist_aktiv
0,143,2570,BirmSte,"Birmensdorf ZH, Sternen/WSL",True
1,309,3356,WaldBir,"Waldegg, Birmensdorferstrasse",True
2,373,6232,FRAF07,"Zürich Flughafen, Fracht",True
3,539,2655,TBAH01,"Thalwil, Bahnhof",True
4,588,3027,FLUG07,"Zürich Flughafen, Bahnhof",True
...,...,...,...,...,...
725,3198,6999,TWÄR,"Binz bei Maur, Twäracher",True
726,3206,7000,SMEU,"Schlieren, Meuchwis",True
727,3217,7022,BAWO,"Zürich, Bahnhof Wollishofen",True
728,3222,7021,SRAI,"Spreitenbach, Raiacker",True


In [11]:
for year in download_url:
    print(year)
    for url in download_url[year]['metadata']:
        print(url)
    

2015
2016
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2016/download/Haltepunkt.csv
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2016/download/Haltestelle.csv
2017
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2017/download/Haltepunkt.csv
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2017/download/Haltestelle.csv
2018
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2018/download/Haltestelle.csv
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2018/download/Haltepunkt.csv
2019
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2019/download/Haltestelle.csv
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2019/download/Haltepunkt.csv
2020
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2020/download/Haltestelle.csv
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2020/download/Haltepunkt.csv
2021
https://data.stadt-zuerich.ch/dataset/vbz_fahrzeiten_ogd_2021/download/Haltepunkt.csv
https://data.s