In [6]:
import requests
from tqdm import tqdm

url_table_logement = "https://www.data.gouv.fr/fr/datasets/r/f314175a-6d33-4ee4-b5eb-2cb6c29df2c2"
url_table_individu = "https://www.data.gouv.fr/fr/datasets/r/c8e1b241-75fe-43e9-a266-830fc30ec61d"
url_doc_logement = "https://www.data.gouv.fr/fr/datasets/r/c274705f-98db-4d9b-9674-578e04f03198"
url_doc_individu = "https://www.data.gouv.fr/fr/datasets/r/1c6c6ab2-b766-41a4-90f0-043173d5e9d1"

def download_file(url: str, filename: str) -> None:
    try:
        # Send a GET request to the URL
        response = requests.get(url, stream=True)
        # Raise an exception for HTTP errors
        response.raise_for_status()
        
        # Get the total file size from the headers (if available)
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024  # 1 Kilobyte

        # Progress bar setup
        progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc=filename)
        
        # Write the content to the file
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=block_size):
                progress_bar.update(len(chunk))
                file.write(chunk)
        
        progress_bar.close()
        print(f"File downloaded successfully: {filename}")
    
    except requests.exceptions.RequestException as e:
        print(f"Failed to download the file: {e}")


download_file(url_table_logement, "RPlogement.parquet")
download_file(url_table_individu, "RPindividus.parquet")
download_file(url_doc_logement, "RPlogement_doc.csv")
download_file(url_doc_individu, "RPindividus_doc.csv")

RPlogement.parquet: 100%|██████████| 504M/504M [00:07<00:00, 64.0MiB/s] 


File downloaded successfully: RPlogement.parquet


RPindividus.parquet: 100%|██████████| 533M/533M [00:18<00:00, 29.0MiB/s]    


File downloaded successfully: RPindividus.parquet


RPlogement_doc.csv: 36.4kiB [00:00, 20.6MiB/s]


File downloaded successfully: RPlogement_doc.csv


RPindividus_doc.csv: 48.6kiB [00:00, 4.51MiB/s]

File downloaded successfully: RPindividus_doc.csv





In [10]:
import duckdb

duckdb.sql("SELECT * FROM read_parquet('RPlogement.parquet') WHERE CATL == 3 LIMIT 5 ")

┌─────────┬─────────┬───────────┬─────────┬───────┬───┬─────────┬─────────┬─────────┬─────────┬─────────┬─────────┐
│ COMMUNE │   ARM   │   IRIS    │  ACHL   │ AEMM  │ … │ TRANSM  │ TRIRIS  │  TYPC   │  TYPL   │  VOIT   │   WC    │
│ varchar │ varchar │  varchar  │ varchar │ int32 │   │ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │
├─────────┼─────────┼───────────┼─────────┼───────┼───┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┤
│ 01001   │ ZZZZZ   │ ZZZZZZZZZ │ B12     │     0 │ … │ Y       │ ZZZZZZ  │ Y       │ 1       │ X       │ Z       │
│ 01001   │ ZZZZZ   │ ZZZZZZZZZ │ B12     │     0 │ … │ Y       │ ZZZZZZ  │ Y       │ 1       │ X       │ Z       │
│ 01001   │ ZZZZZ   │ ZZZZZZZZZ │ B12     │     0 │ … │ Y       │ ZZZZZZ  │ Y       │ 1       │ X       │ Z       │
│ 01001   │ ZZZZZ   │ ZZZZZZZZZ │ A12     │     0 │ … │ Y       │ ZZZZZZ  │ Y       │ 1       │ X       │ Z       │
│ 01001   │ ZZZZZ   │ ZZZZZZZZZ │ A12     │     0 │ … │ Y       │ ZZZZZZ