# Imports & Env

In [None]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
liri_key = os.getenv('liri_key')
liri_secret = os.getenv('liri_secret')

# Submitting a Query

In [None]:
query_name = "Query name"
query_comment = "Query comment"
expiration_date = "YYYY-MM-DD"

headers = {
    "X-API-Key": liri_key,
    "X-API-Secret": liri_secret
}
API_BASE_URL = "https://swissdox.linguistik.uzh.ch/api"
API_URL_QUERY = f"{API_BASE_URL}/query"

yaml = f"""
    query:
        dates:
            - from: 2018-01-01
              to: 2024-08-29
        languages:
            - fr
            - de
            - it
        content:
            OR:
                - Gesundheitskosten
                - Gesundheitssystem
                - Gesundheitswesen
                - Gesundheitspolitik
                - Gesundheitsreform
                - Gesundheitssektor
                - Gesundheitsversorgung
                - Spitalwesen
                - Versicherungspflicht
                - versicherungsmodell
                - Krankenkassen
                - Krankenversicherung
                - Krankenhaus
                - Spital
                - Gesundheistprävention
                - Gesundheitsförderung
                - Gesundheitsdienste
                - Coûts de la santé
                - Système de santé
                - Système de santé
                - Politique de santé
                - Secteur de la santé
                - Réforme de la santé
                - Secteur sanitaire
                - Accès aux soins de santé
                - Secteur hospitalier
                - Obligation d'assurance
                - Modèle d'assurance
                - Caisses maladie
                - Assurance maladie
                - Hôpital
                - Centre hospitalier
                - Prévention sanitaire
                - Promotion de la santé
                - Services de santé
                - Costi della salute
                - Sistema sanitario
                - Settore sanitario
                - Politica sanitaria
                - Riforma sanitaria
                - Settore della salute
                - Accesso ai servizi sanitari
                - Settore ospedaliero
                - Obbligo di assicurazione
                - Modello assicurativo
                - Casse malati
                - Assicurazione malattia
                - Ospedale
                - Centro ospedaliero
                - Prevenzione sanitaria
                - Promozione della salute
                - Servizi sanitari

    result:
        format: TSV
        maxResults: 10000000
        columns:
            - id
            - pubtime
            - medium_code
            - medium_name
            - rubric
            - regional
            - doctype
            - doctype_description
            - language
            - char_count
            - dateline
            - head
            - subhead
            - content_id
            - content
    version: 1.2
"""

data = {
    "query": yaml,
    "test": "0",
    "name": query_name,
    "comment": query_comment,
    "expirationDate": expiration_date
}

r = requests.post(
    API_URL_QUERY,
    headers=headers,
    data=data
)

print("Status Code:", r.status_code)
print(r.json())

# Checking the status of submitted Queries

In [None]:
headers = {
    "X-API-Key": liri_key,
    "X-API-Secret": liri_secret
}
API_BASE_URL = "https://swissdox.linguistik.uzh.ch/api"
API_URL_STATUS = f"{API_BASE_URL}/status"

r = requests.get(
    API_URL_STATUS,
    headers=headers
)

# Parsing the JSON response
api_response = r.json()

# Extract name, status, and downloadUrl
def parse_api_response(api_response):
    # Extracting the required fields into a DataFrame
    df = pd.DataFrame(api_response, columns=['name', 'status', 'downloadUrl'])
    return df

df_parsed = parse_api_response(api_response)

# Download of the retrieved Dataset

In [None]:
# Function to get status and downloadUrl for a given name from the dataframe
def get_status_and_download_url(df, name):
    # Filter the dataframe for the specific name
    result = df[df['name'] == name]

    if not result.empty:
        # Extract the status and downloadUrl
        status = result['status'].values[0]
        download_url = result['downloadUrl'].values[0]
        return status, download_url
    else:
        return None, None

name = query_name
status, download_url = get_status_and_download_url(df_parsed, name)

print("Name:", name)
print("Status:", status)
print("Download URL:", download_url)

# Saving

In [None]:
# Ensure the directory exists
download_folder = "data/00_downloads"
os.makedirs(download_folder, exist_ok=True)

# Path to the file within the download folder
file_path = os.path.join(download_folder, "dataset.tsv.xz")

# API request to download the file
r = requests.get(download_url, headers=headers)
if r.status_code == 200:
    print("Size of file: %.2f KB" % (len(r.content)/1024))

    # Save the file to the new folder
    with open(file_path, "wb") as fp:
        fp.write(r.content)
    print(f"File saved to {file_path}")
else:
    print(r.text)