In [2]:
import os
import pandas as pd
import json
import requests
import re

# PARAMETRES

In [3]:
dir_path = r'D:\DATA_ANALYSE\Meteo_France\MONTH' # YOUR_DIR_PATH
dep = ['56'] # list of department numbers 
option = None # one of the options file for the Daily datasets : 'RR-T-Vent' or 'autres-parametres', None if you want all

station_name = 'LE FAOUET' # extract by station name if you know it exactly, else None and enter the station_id
station_id = 56057001 # extract by station id if you know it, else None and enter the exact station_name

data_freq = 'MONTH' # one of the options : '6MIN', 'HOUR', 'DAY', 'MONTH', 'DECA', 'DECAGRO'


# Téléchargement - Download

In [4]:
data_type = {'6MIN' : '6569ad61106d1679c93cdf77/',
             'HOUR' : '6569b4473bedf2e7abad3b72/',
             'DAY' : '6569b51ae64326786e4e8e1a/',
             'MONTH' : '6569b3d7d193b4daf2b43edc/',
             'DECA' : '6569b4a48a4161faec6b2779/',
             'DECAGRO' : '6569af36ba0c3d2f9d4bf98c/'}

In [5]:
# general link of the datasets
url = 'https://www.data.gouv.fr/api/2/datasets/' + data_type[data_freq]

In [6]:
response = requests.get(url)
response

<Response [200]>

In [7]:
# finding how many pages for all datasets
nb_pages = json.loads(response.text)['resources']['total']
nb_pages

315

In [8]:
url_dep = url + f'/resources/?page=1&page_size={nb_pages}&type=main'

In [9]:
response = requests.get(url_dep)
data = response.json()

In [11]:
# grab all the page to find the department datasets
compte = 0
for k in range(len(data['data'])):
    titre = data['data'][k]['title']
    m=re.search('departement_([0-9]{2,3})_', titre)
    if m.group(1) in dep and (option is None or (option is not None and option in titre)):        
        url1 = data['data'][k]['url']
        format = data['data'][k]['format']
        resp = requests.get(url1)
        if resp.status_code == 200:
            with open(os.path.join(dir_path, titre + '.' + format), 'wb') as file:
                file.write(resp.content)  
                compte += 1
        else:
            print('problem with ', titre)
print(compte, ' file(s) downloaded')
            

3  file(s) downloaded


# Extraction données de la station

In [12]:
# list the files of the department(s)
liste_file = []
for file_path in os.listdir(dir_path):
    # check if current file_path is a file
    if os.path.isfile(os.path.join(dir_path, file_path)):
        # add filename to list
        liste_file.append(file_path)

data_files = []
for file_n in liste_file:
    m=re.search('departement_([0-9]{2,3})_', file_n)
    if m.group(1) in dep and (option is None or (option is not None and option in file_n)): 
        data_files.append(file_n)

print(data_files)

['MENS_departement_56_periode_1871-1949.csv.gz', 'MENS_departement_56_periode_1950-2022.csv.gz', 'MENS_departement_56_periode_2023-2024.csv.gz']


## OPTION 1 : dataset de toutes les stations (et de tous les départements téléchargés)

In [13]:
df_all = pd.DataFrame()
for file_name in data_files:
    df_0 = pd.read_csv(os.path.join(dir_path, file_name), sep = ';')    
    df_all = pd.concat([df_all, df_0], ignore_index = True)
df_all.head()

Unnamed: 0,NUM_POSTE,NOM_USUEL,LAT,LON,ALTI,AAAAMM,RR,QRR,NBRR,RR_ME,...,QNEIGETOTM,NEIGETOTAB,QNEIGETOTAB,NEIGETOTABDAT,NBJNEIGETOT1,NBJNEIGETOT10,NBJNEIGETOT30,NBJGREL,NBJORAG,NBJBROU
0,56005001,ARZON PORT NAVALO,47.55,-2.893333,16,187601,,,,10.0,...,,,,,,,,,,
1,56005001,ARZON PORT NAVALO,47.55,-2.893333,16,187602,,,,46.0,...,,,,,,,,,,
2,56005001,ARZON PORT NAVALO,47.55,-2.893333,16,187603,,,,90.0,...,,,,,,,,,,
3,56005001,ARZON PORT NAVALO,47.55,-2.893333,16,187604,,,,27.0,...,,,,,,,,,,
4,56005001,ARZON PORT NAVALO,47.55,-2.893333,16,187605,,,,6.0,...,,,,,,,,,,


**<span style="color:red">df_all</span> is the dataset of all the stations**

### SAVE your dataset in a file

In [14]:
file_path = os.path.join(dir_path, 'my_dep.csv')
df_all.to_csv(file_path, sep = ';', index = False)

## OPTION 2 : dataset de la station uniquement

In [15]:
df_station = pd.DataFrame()
for file_name in data_files:
    df0 = pd.read_csv(os.path.join(dir_path, file_name), sep = ';')
    if station_id is not None:
        df0 = df0[df0['NUM_POSTE'] == station_id]
    else:
        df0 = df0[df0['NOM_USUEL'] == station_name]
    df_station = pd.concat([df_station, df0], ignore_index = True)
df_station.head()

Unnamed: 0,NUM_POSTE,NOM_USUEL,LAT,LON,ALTI,AAAAMM,RR,QRR,NBRR,RR_ME,...,QNEIGETOTM,NEIGETOTAB,QNEIGETOTAB,NEIGETOTABDAT,NBJNEIGETOT1,NBJNEIGETOT10,NBJNEIGETOT30,NBJGREL,NBJORAG,NBJBROU
0,56057001,LE FAOUET,48.033333,-3.5,155,189201,26.0,9.0,31.0,,...,,,,,,,,,,
1,56057001,LE FAOUET,48.033333,-3.5,155,189202,55.0,9.0,29.0,,...,,,,,,,,,,
2,56057001,LE FAOUET,48.033333,-3.5,155,189203,17.0,9.0,31.0,,...,,,,,,,,,,
3,56057001,LE FAOUET,48.033333,-3.5,155,189204,34.0,9.0,30.0,,...,,,,,,,,,,
4,56057001,LE FAOUET,48.033333,-3.5,155,189205,28.0,9.0,31.0,,...,,,,,,,,,,


**<span style="color:red">df_station</span> is the dataset of your selected station, enjoy !**

### SAVE your dataset in a file

In [16]:
file_path = os.path.join(dir_path, 'my_station.csv')
df_station.to_csv(file_path, sep = ';', index = False)