# Etape 2: Création de Données Météorologiques pour la Prédiction des Incendies

"""
## Contexte
Dans cette partie du projet, nous utilisons les données géographiques générées dans la première partie (dates et coordonnées des incendies et non-incendies) pour enrichir notre dataset avec des informations météorologiques pertinentes. Ces données seront essentielles pour analyser l'impact des conditions météo sur la probabilité d'incendies.

## Objectif
L'objectif ici est de générer des données météorologiques précises pour les dates et lieux spécifiés dans notre dataset initial. Nous allons utiliser une API météorologique pour récupérer des informations telles que la température, l'humidité, la vitesse du vent, et les précipitations. Ces variables sont cruciales pour notre modèle de prédiction d'incendies.

## Utilisation de l'API
Pour accomplir cela, nous ferons appel à une API météorologique qui permet d'accéder à des données historiques et en temps réel. Ce processus inclut la préparation des requêtes API pour chaque enregistrement de notre dataset, la gestion des réponses de l'API, et l'intégration des données météorologiques dans notre dataframe.

## Stockage des Données
Une fois les données météorologiques collectées et traitées, elles seront ajoutées à notre ensemble de données existant. Le dataframe final, enrichi des informations météorologiques, sera sauvegardé pour des analyses ultérieures dans le processus de prédiction des incendies.

Ce notebook détaille chaque étape de ce processus pour garantir une compréhension claire.
"""


In [1]:
from geopy.geocoders import Nominatim
import requests
from datetime import datetime
import openmeteo_requests


import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import requests_cache
import missingno as msno 
import matplotlib.pyplot as plt
from datetime import timedelta
import pandas as pd
import numpy as np
from openmeteo_requests import Client
import requests_cache
from retry_requests import retry


In [5]:
df = pd.read_csv("data_Portugal_.csv")

In [6]:
# Étape 1: Formater acq_time pour avoir le format hh:mm
df['acq_time'] = df['acq_time'].apply(lambda x: f'{x:04}')  # Ajoute des zéros à gauche pour avoir 4 chiffres
df['acq_time'] = df['acq_time'].str[:2] + ':' + df['acq_time'].str[2:]  # Ajoute le deux-points

# Étape 2: Créer la colonne datetime à partir de acq_date et acq_time
df['date'] = pd.to_datetime(df['acq_date'] + ' ' + df['acq_time'])

# Étape 3: Convertir en format ISO avec fuseau horaire UTC
df['date'] = df['date'].dt.strftime('%Y-%m-%d %H:%M:%S+00:00')

In [7]:
df

Unnamed: 0,latitude,longitude,acq_date,acq_time,class,date
0,39.8053,-8.6446,2023-08-03,14:08,not fire,2023-08-03 14:08:00+00:00
1,40.9737,-7.9931,2023-02-13,13:54,not fire,2023-02-13 13:54:00+00:00
2,37.4558,-8.6925,2023-08-07,14:25,fire,2023-08-07 14:25:00+00:00
3,39.7427,-7.7687,2023-08-05,10:35,fire,2023-08-05 10:35:00+00:00
4,39.7274,-7.7657,2023-08-05,13:03,fire,2023-08-05 13:03:00+00:00
...,...,...,...,...,...,...
1071,37.3930,-8.6284,2023-08-07,21:22,fire,2023-08-07 21:22:00+00:00
1072,39.7776,-7.7632,2023-08-05,10:35,fire,2023-08-05 10:35:00+00:00
1073,37.4010,-8.7115,2023-08-06,22:19,fire,2023-08-06 22:19:00+00:00
1074,37.4002,-8.6646,2023-08-27,14:25,not fire,2023-08-27 14:25:00+00:00


# Fonction de Récupération des Données Météorologiques

"""
## Objectif de la Fonction
Cette fonction est conçue pour récupérer les données météorologiques associées à un point géographique spécifique et à une date d'acquisition donnée. L'importance de récupérer les données jusqu'à 24 heures avant l'heure minimale supposée de l'incendie est cruciale pour notre analyse.

## Justification du Choix Temporel
L'acquisition des données météorologiques 24 heures avant les 12 heures post-incendie permet de compenser les éventuels délais dans l'enregistrement des incendies ou des inexactitudes dans la détermination de l'heure exacte de début de l'incendie. Cela est particulièrement important car l'heure de l'acquisition ('acq_date') peut être enregistrée jusqu'à 12 heures après l'incendie effectif.

"""


In [8]:
def fetch_weather_data(longitude, latitude, date):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = Client(session=retry_session)
    # Parameters for the API request
    date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S%z")
    adjusted_datetime = date - timedelta(hours=12)
    start_datetime = adjusted_datetime - timedelta(hours=24)
    end_date = adjusted_datetime.date()
    start_date =  end_date - timedelta(days=1)
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "rain", "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm"]
    }
    
    # Fetch the weather data
    responses = openmeteo.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)
    
    # Assuming only one response object is needed
    response = responses[0]
    hourly = response.Hourly()
    
    # Extract hourly data
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
    hourly_rain = hourly.Variables(3).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
    hourly_soil_temperature_0_to_7cm = hourly.Variables(5).ValuesAsNumpy()
    hourly_soil_moisture_0_to_7cm = hourly.Variables(6).ValuesAsNumpy()

    hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
    
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["rain"] = hourly_rain
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
    hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm

    
    
    df = pd.DataFrame(hourly_data)
    df = df[(df['date'] >= start_datetime) & (df['date'] < adjusted_datetime)]
    results = {
        "max_temperature": np.max(df['temperature_2m']),
        "min_temperature": np.min(df['temperature_2m']),
        "mean_temperatur": np.mean(df['temperature_2m']),
        "max_humidity": np.max(df['relative_humidity_2m']),
        "min_humidity": np.min(df['relative_humidity_2m']),
        "mean_humidity": np.mean(df['relative_humidity_2m']),
        "total_precipitation": np.sum(df['precipitation']),
         "max_soil_temperature": np.max(df['soil_temperature_0_to_7cm']),
        "min_soil_temperature": np.min(df['soil_temperature_0_to_7cm']),
        "mean_soil_temperature": np.mean(df['soil_temperature_0_to_7cm']),
        "min_soil_moisture": np.min(df['soil_moisture_0_to_7cm']),
        "mean_soil_moisture": np.mean(df['soil_moisture_0_to_7cm']),
        "max_wind_speed": np.max(df['wind_speed_10m']),
        "total_rain": np.sum(df['rain'])
        
    }
   
    return results

In [9]:
def apply_weather_data(row):
    try:
        
    
        
        # Appeler fetch_weather_data pour chaque ligne
        results = fetch_weather_data(row['longitude'], row['latitude'], row["date"])
        return pd.Series(results)
    except Exception as e:
        print(f"Erreur lors de la récupération des données météo: {e}")
        return pd.Series({key: np.nan for key in results.keys()})


In [10]:
# Appliquer la fonction pour chaque ligne et récupérer les résultats
weather_metrics = df.apply(apply_weather_data, axis=1)

# Ajouter les résultats comme nouvelles colonnes dans le DataFrame original
data = pd.concat([df, weather_metrics], axis=1)

In [11]:
data

Unnamed: 0,latitude,longitude,acq_date,acq_time,class,date,max_temperature,min_temperature,mean_temperatur,max_humidity,min_humidity,mean_humidity,total_precipitation,max_soil_temperature,min_soil_temperature,mean_soil_temperature,min_soil_moisture,mean_soil_moisture,max_wind_speed,total_rain
0,39.8053,-8.6446,2023-08-03,14:08,not fire,2023-08-03 14:08:00+00:00,26.064001,15.613999,20.176500,98.444725,59.126102,82.937073,1.8,26.714001,18.864000,22.547333,0.102,0.113292,23.863411,1.8
1,40.9737,-7.9931,2023-02-13,13:54,not fire,2023-02-13 13:54:00+00:00,6.576500,-1.323500,1.855667,85.143608,57.491592,72.135567,0.0,6.026500,-0.273500,2.432750,0.326,0.330333,15.077082,0.0
2,37.4558,-8.6925,2023-08-07,14:25,fire,2023-08-07 14:25:00+00:00,34.023003,22.872999,27.779245,51.871464,18.076952,33.507095,0.0,40.523003,22.022999,30.775085,0.074,0.075208,17.673029,0.0
3,39.7427,-7.7687,2023-08-05,10:35,fire,2023-08-05 10:35:00+00:00,33.468502,17.968500,25.524750,57.478100,13.125694,31.453638,0.0,38.968502,21.018501,29.072668,0.092,0.092000,25.202570,0.0
4,39.7274,-7.7657,2023-08-05,13:03,fire,2023-08-05 13:03:00+00:00,33.468502,17.968500,26.095581,53.350155,13.125694,27.505592,0.0,38.968502,21.018501,29.360168,0.092,0.092000,25.202570,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,37.3930,-8.6284,2023-08-07,21:22,fire,2023-08-07 21:22:00+00:00,38.552998,22.703001,30.071753,44.678986,13.425508,28.836119,0.0,40.653000,24.552999,31.548834,0.108,0.109333,24.442913,0.0
1072,39.7776,-7.7632,2023-08-05,10:35,fire,2023-08-05 10:35:00+00:00,32.181499,17.031500,24.581497,54.152946,13.233459,31.745611,0.0,36.631500,19.681499,27.423164,0.095,0.095000,25.924999,0.0
1073,37.4010,-8.7115,2023-08-06,22:19,fire,2023-08-06 22:19:00+00:00,34.699001,23.549000,28.138580,49.370903,16.209232,32.658298,0.0,39.648998,22.698999,30.163584,0.074,0.076083,23.893261,0.0
1074,37.4002,-8.6646,2023-08-27,14:25,not fire,2023-08-27 14:25:00+00:00,23.991999,17.741999,20.887835,81.278290,56.523777,65.929955,0.0,31.691999,20.642000,25.889915,0.072,0.073750,28.802248,0.0


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1076 entries, 0 to 1075
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   latitude               1076 non-null   float64
 1   longitude              1076 non-null   float64
 2   acq_date               1076 non-null   object 
 3   acq_time               1076 non-null   object 
 4   class                  1076 non-null   object 
 5   date                   1076 non-null   object 
 6   max_temperature        1076 non-null   float64
 7   min_temperature        1076 non-null   float64
 8   mean_temperatur        1076 non-null   float64
 9   max_humidity           1076 non-null   float64
 10  min_humidity           1076 non-null   float64
 11  mean_humidity          1076 non-null   float64
 12  total_precipitation    1076 non-null   float64
 13  max_soil_temperature   1076 non-null   float64
 14  min_soil_temperature   1076 non-null   float64
 15  mean

In [14]:
data.drop(["acq_date","acq_time"], axis=1, inplace = True)

In [15]:
data

Unnamed: 0,latitude,longitude,class,date,max_temperature,min_temperature,mean_temperatur,max_humidity,min_humidity,mean_humidity,total_precipitation,max_soil_temperature,min_soil_temperature,mean_soil_temperature,min_soil_moisture,mean_soil_moisture,max_wind_speed,total_rain
0,39.8053,-8.6446,not fire,2023-08-03 14:08:00+00:00,26.064001,15.613999,20.176500,98.444725,59.126102,82.937073,1.8,26.714001,18.864000,22.547333,0.102,0.113292,23.863411,1.8
1,40.9737,-7.9931,not fire,2023-02-13 13:54:00+00:00,6.576500,-1.323500,1.855667,85.143608,57.491592,72.135567,0.0,6.026500,-0.273500,2.432750,0.326,0.330333,15.077082,0.0
2,37.4558,-8.6925,fire,2023-08-07 14:25:00+00:00,34.023003,22.872999,27.779245,51.871464,18.076952,33.507095,0.0,40.523003,22.022999,30.775085,0.074,0.075208,17.673029,0.0
3,39.7427,-7.7687,fire,2023-08-05 10:35:00+00:00,33.468502,17.968500,25.524750,57.478100,13.125694,31.453638,0.0,38.968502,21.018501,29.072668,0.092,0.092000,25.202570,0.0
4,39.7274,-7.7657,fire,2023-08-05 13:03:00+00:00,33.468502,17.968500,26.095581,53.350155,13.125694,27.505592,0.0,38.968502,21.018501,29.360168,0.092,0.092000,25.202570,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,37.3930,-8.6284,fire,2023-08-07 21:22:00+00:00,38.552998,22.703001,30.071753,44.678986,13.425508,28.836119,0.0,40.653000,24.552999,31.548834,0.108,0.109333,24.442913,0.0
1072,39.7776,-7.7632,fire,2023-08-05 10:35:00+00:00,32.181499,17.031500,24.581497,54.152946,13.233459,31.745611,0.0,36.631500,19.681499,27.423164,0.095,0.095000,25.924999,0.0
1073,37.4010,-8.7115,fire,2023-08-06 22:19:00+00:00,34.699001,23.549000,28.138580,49.370903,16.209232,32.658298,0.0,39.648998,22.698999,30.163584,0.074,0.076083,23.893261,0.0
1074,37.4002,-8.6646,not fire,2023-08-27 14:25:00+00:00,23.991999,17.741999,20.887835,81.278290,56.523777,65.929955,0.0,31.691999,20.642000,25.889915,0.072,0.073750,28.802248,0.0


In [16]:
data.to_csv("data_Portugal__.csv", index = False)

In [17]:
df = pd.read_csv("data_Greece_.csv")

In [18]:
# Étape 1: Formater acq_time pour avoir le format hh:mm
df['acq_time'] = df['acq_time'].apply(lambda x: f'{x:04}')  # Ajoute des zéros à gauche pour avoir 4 chiffres
df['acq_time'] = df['acq_time'].str[:2] + ':' + df['acq_time'].str[2:]  # Ajoute le deux-points

# Étape 2: Créer la colonne datetime à partir de acq_date et acq_time
df['date'] = pd.to_datetime(df['acq_date'] + ' ' + df['acq_time'])

# Étape 3: Convertir en format ISO avec fuseau horaire UTC
df['date'] = df['date'].dt.strftime('%Y-%m-%d %H:%M:%S+00:00')

In [19]:
# Appliquer la fonction pour chaque ligne et récupérer les résultats
weather_metrics = df.apply(apply_weather_data, axis=1)

# Ajouter les résultats comme nouvelles colonnes dans le DataFrame original
data = pd.concat([df, weather_metrics], axis=1)

In [20]:
data

Unnamed: 0,latitude,longitude,acq_date,acq_time,class,date,max_temperature,min_temperature,mean_temperatur,max_humidity,min_humidity,mean_humidity,total_precipitation,max_soil_temperature,min_soil_temperature,mean_soil_temperature,min_soil_moisture,mean_soil_moisture,max_wind_speed,total_rain
0,40.8753,25.7921,2023-08-22,00:43,fire,2023-08-22 00:43:00+00:00,37.216999,26.617001,30.902418,39.357601,17.723669,30.505224,0.0,35.267002,26.867001,30.550341,0.139,0.141958,27.887802,0.0
1,41.0431,26.0011,2023-08-02,09:02,not fire,2023-08-02 09:02:00+00:00,28.542500,18.292500,23.475836,88.970711,43.260834,64.741280,1.0,32.292503,20.892500,26.186251,0.134,0.136042,14.336861,1.0
2,36.1087,27.9427,2023-07-24,00:08,fire,2023-07-24 00:08:00+00:00,42.440998,29.140999,34.430580,44.080235,12.869796,30.432993,0.0,43.241001,30.890999,36.230587,0.106,0.106000,20.873791,0.0
3,40.8936,25.7387,2023-08-02,11:48,not fire,2023-08-02 11:48:00+00:00,26.299501,19.999500,23.532837,81.271935,54.133595,68.430412,0.9,29.199501,21.699501,25.234915,0.240,0.240750,10.383987,0.9
4,36.2543,27.9132,2023-07-30,00:37,not fire,2023-07-30 00:37:00+00:00,33.522499,21.922499,26.220411,77.584366,20.738951,54.043488,0.0,38.822498,25.272499,30.801664,0.104,0.104000,21.578989,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7162,36.2037,28.0475,2023-07-14,11:11,not fire,2023-07-14 11:11:00+00:00,37.451000,27.650999,32.717670,48.621288,21.819862,33.412159,0.0,44.501003,29.051001,36.519749,0.095,0.095000,14.904173,0.0
7163,36.0895,27.9149,2023-07-07,09:21,not fire,2023-07-07 09:21:00+00:00,35.335503,21.685499,27.285500,94.921204,33.652458,68.450455,0.0,40.035500,25.235498,31.420919,0.100,0.105583,17.477621,0.0
7164,40.3457,23.4106,2023-08-02,11:48,not fire,2023-08-02 11:48:00+00:00,27.727999,21.577999,24.617584,82.922882,56.562672,68.786919,0.6,30.028000,22.878000,26.036333,0.222,0.224208,10.829958,0.6
7165,36.0612,27.8905,2023-07-25,11:52,fire,2023-07-25 11:52:00+00:00,38.234501,28.834499,32.582417,50.019516,17.424505,35.326164,0.0,40.934498,31.184500,35.580334,0.107,0.107000,32.399998,0.0


In [21]:
data.drop(["acq_date","acq_time"], axis=1, inplace = True)

In [22]:
data.to_csv("data_Greece__.csv", index = False)