# Collect Hautes Savoies data from Ademe API

Imports

In [None]:
import requests
import time

import pandas as pd

### Utils and constant

Define base url for Ademe API

In [15]:
BASE_URL = 'https://data.ademe.fr/data-fair/api/v1/datasets'

Safe API call

In [32]:
def requests_API(endpoint, custom_url:str = None, params:dict = None, tic:int = 0) -> dict:

    try:

        if custom_url:
            response = requests.get(custom_url)
        else:
            url = BASE_URL + endpoint
            response = requests.get(url, params=params)

    except ConnectionError:

        print(f'ConnectionError, network failed to request API ({tic+1}/3)')

        if(tic == 3):
            print('Failed to fetch')
            return None
        
        print('Trying again in 1 second')
        time.sleep(1)
        return requests_API(endpoint, params=params, custom_url=custom_url, tic=tic+1)
        
    if response.status_code != 200:

        if(tic == 3):
            print('Failed')
            return None
        
        print(f'Error code {response.status_code} : {response.text}')
        time.sleep(1)
        return requests_API(endpoint, params=params, custom_url=custom_url, tic=tic+1)
    
    return response.json()

Append `list[dict]` DataFrame

In [3]:
def append_data(df:pd.DataFrame, data:list[dict]) -> None:
    
    data_df = pd.DataFrame(data)
    return pd.concat([df, data_df], ignore_index=True)

### Preview results

Check number of results

In [34]:
params = {
    'size': 0,
    'qs': 'code_departement_ban:74'
}

content = requests_API('/dpe03existant/lines', params=params)
content['total']

170770

### Get all data from Haute Savoies

Existing

In [None]:
params = {
    'size': 200,
    'qs': 'code_departement_ban:74'
}

content = requests_API('/dpe03existant/lines', params=params)
existants = pd.DataFrame(content['results'])

while content.get('next'):

    after_token = content['next'].split('=')[-1]

    content = requests_API(None, custom_url=content['next'])
    existants = append_data(existants, content['results'])

    progress = round(existants.shape[0] / content['total'] * 100, 2)
    print(f"\rLoading: {progress}% [{after_token}]", end='', flush=True) 

existants.to_csv('data/existant74.csv', index=False)

Loading: 100.0% [22578092448716%2C307914]

New

In [None]:
params = {
    'size': 200,
    'qs': 'code_departement_ban:74'
}

content = requests_API('/dpe02neuf/lines', params=params)
new = pd.DataFrame(content['results'])

while content.get('next'):

    after_token = content['next'].split('=')[-1]

    content = requests_API(None, custom_url=content['next'])
    new = append_data(new, content['results'])

    progress = round(new.shape[0] / content['total'] * 100, 2)
    print(f"\rLoading: {progress}% [{after_token}]", end='', flush=True) 

new.to_csv('data/neuf74.csv', index=False)

Loading: 100.0% [30582116851000%2C330320]