In [None]:
# Installer les bibliothèques nécessaires
!pip install geopandas shapely pandas --upgrade --quiet

# Imports
import geopandas as gpd
import shapely
import pandas as pd
import os
from functools import partial
import multiprocessing
import gc
import time
import traceback

# Affichage des versions pour vérification
print(f"GeoPandas version: {gpd.__version__}")
print(f"Shapely version: {shapely.__version__}")


In [None]:
# Monter Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Chemins
base_safran_folder = "/content/drive/MyDrive/Stage_CEFREM_Salma_2025/Data/Meteo/SAFRAN/Safran_origine"
base_gis_folder = "/content/drive/MyDrive/Stage_CEFREM_Salma_2025/Data/GIS"
chemin_fichier_pyrenees_shp = os.path.join(base_gis_folder, "polygon_PYRENEES.shp")
chemin_fichier_sortie_global = os.path.join(base_safran_folder, "Données_SAFRAN_Pyrenees_filtré_Final.csv")

# Liste des fichiers SAFRAN
safran_files = [
    "QUOT_SIM2_1970-1979.csv",
    "QUOT_SIM2_1980-1989.csv",
    "QUOT_SIM2_1990-1999.csv",
    "QUOT_SIM2_2000-2009.csv",
    "QUOT_SIM2_2010-2019.csv",
    "QUOT_SIM2_latest-20250601-20250714.csv",
    "QUOT_SIM2_previous-2020-202506.csv"
]

# CRS & colonnes
crs_safran_initial = 'EPSG:27572'
crs_cible_wgs84 = 'EPSG:4326'
coord_col_x, coord_col_y = 'LAMBX', 'LAMBY'
columns_to_keep = ['LAMBX', 'LAMBY', 'DATE', 'PRENEI_Q', 'PRELIQ_Q', 'T_Q']
na_values_list = ['NA', 'N/A', 'NULL', '', ' ', '*****', '-9999']

dtype_safran = {
    'LAMBX': 'LAMBX', 'LAMBY': 'LAMBY', 'DATE': 'DATE',
    'PRENEI_Q': 'SAFRAN_Snowfall_daily_mm',
    'PRELIQ_Q': 'SAFRAN_Precipitation_liquid_daily_mm',
    'T_Q': 'SAFRAN_Temperature_daily_C'
}

read_dtypes_optimized = {
    'LAMBX': 'float32', 'LAMBY': 'float32', 'DATE': str,
    'PRENEI_Q': 'float32', 'PRELIQ_Q': 'float32', 'T_Q': 'float32'
}

In [None]:
# Lecture et reprojection du shapefile
gdf_pyrenees_polygon = gpd.read_file(chemin_fichier_pyrenees_shp)
if gdf_pyrenees_polygon.crs != crs_cible_wgs84:
    gdf_pyrenees_polygon = gdf_pyrenees_polygon.to_crs(crs_cible_wgs84)

gdf_pyrenees_in_safran_crs = gdf_pyrenees_polygon.to_crs(crs_safran_initial)
pyrenees_bounds_in_safran_crs = gdf_pyrenees_in_safran_crs.total_bounds
pyrenees_union_geometry = gdf_pyrenees_polygon.unary_union


In [None]:
def process_safran_file(filename, pyrenees_union_geometry, pyrenees_bounds_in_safran_crs, base_safran_folder,
                        crs_safran_initial, crs_cible_wgs84,
                        coord_col_x, coord_col_y, columns_to_keep,
                        na_values_list, dtype_safran, read_dtypes_optimized):

    path = os.path.join(base_safran_folder, filename)
    all_chunks = []
    chunksize = 200000
    try:
        for chunk in pd.read_csv(path, sep=';', decimal=',', encoding='utf-8', na_values=na_values_list,
                                 usecols=columns_to_keep, chunksize=chunksize):
            chunk = chunk.dropna(subset=[coord_col_x, coord_col_y])
            chunk['LAMBX_m'] = chunk[coord_col_x] * 100
            chunk['LAMBY_m'] = chunk[coord_col_y] * 100

            minx, miny, maxx, maxy = pyrenees_bounds_in_safran_crs
            chunk = chunk[(chunk['LAMBX_m'] >= minx) & (chunk['LAMBY_m'] >= miny) &
                          (chunk['LAMBX_m'] <= maxx) & (chunk['LAMBY_m'] <= maxy)]
            if chunk.empty:
                continue

            gdf = gpd.GeoDataFrame(chunk, geometry=gpd.points_from_xy(chunk['LAMBX_m'], chunk['LAMBY_m']),
                                   crs=crs_safran_initial).to_crs(crs_cible_wgs84)
            gdf = gdf[gdf.geometry.within(pyrenees_union_geometry)]

            df = gdf.drop(columns=['geometry', 'LAMBX_m', 'LAMBY_m'])
            df = df.rename(columns=dtype_safran)
            all_chunks.append(df)

        if all_chunks:
            return pd.concat(all_chunks, ignore_index=True)
        else:
            return pd.DataFrame(columns=dtype_safran.values())

    except Exception as e:
        print(f" Erreur : {e}")
        traceback.print_exc()
        return pd.DataFrame(columns=dtype_safran.values())

In [None]:
# Traitement séquentiel simple pour commencer
all_results = []
for f in safran_files:
    print(f"Traitement de {f}...")
    df = process_safran_file(
        f,
        pyrenees_union_geometry,
        pyrenees_bounds_in_safran_crs,
        base_safran_folder,
        crs_safran_initial,
        crs_cible_wgs84,
        coord_col_x,
        coord_col_y,
        columns_to_keep,
        na_values_list,
        dtype_safran,
        read_dtypes_optimized
    )
    if not df.empty:
        all_results.append(df)

In [None]:
if all_results:
    final_df = pd.concat(all_results, ignore_index=True)
    final_df.to_csv(chemin_fichier_sortie_global, index=False, encoding='utf-8')
    print(f"\n Données sauvegardées dans : {chemin_fichier_sortie_global}")
else:
    print(" Aucun résultat à sauvegarder.")


In [None]:
try:
    df_final = pd.read_csv(chemin_fichier_sortie_global, encoding='utf-8', decimal=',')
    print(df_final.head())
    print(df_final.info())
except Exception as e:
    print(f" Lecture échouée : {e}")
