# Les imports et configuration du document

In [1]:
import pandas as pd 
import seaborn as sns
import warnings 
import os
import time
from datetime import datetime
import glob
from matplotlib import pyplot as plt

warnings.filterwarnings(action="ignore")

%matplotlib inline

if int(str(sns.__version__).split('.')[1]) > 8 : 
    plt.style.use('seaborn-v0_8-darkgrid')
else:
    plt.style.use('seaborn-darkgrid')
    
sns.set(font_scale=3)

In [2]:
!dir ..\donnees\meteo\2025

 Le volume dans le lecteur C s'appelle Windows
 Le num‚ro de s‚rie du volume est 7E56-F105

 R‚pertoire de C:\dev\AnalyseTraitementDonnees\donnees\meteo\2025

02/02/2026  18:35    <DIR>          .
02/02/2026  18:35    <DIR>          ..
31/01/2025  22:10         4ÿ143ÿ578 synop.202501.csv
28/02/2025  22:10         3ÿ684ÿ501 synop.202502.csv
31/03/2025  22:10         4ÿ073ÿ763 synop.202503.csv
30/04/2025  22:10         3ÿ915ÿ948 synop.202504.csv
31/05/2025  22:10         4ÿ126ÿ452 synop.202505.csv
30/06/2025  22:10         3ÿ932ÿ512 synop.202506.csv
31/07/2025  22:10         4ÿ111ÿ781 synop.202507.csv
31/08/2025  22:10         4ÿ082ÿ704 synop.202508.csv
30/09/2025  22:10         4ÿ005ÿ894 synop.202509.csv
10/10/2025  07:10         1ÿ261ÿ619 synop.202510.csv
30/11/2025  22:10         4ÿ069ÿ074 synop.202511.csv
31/12/2025  22:10         4ÿ238ÿ763 synop.202512.csv
              12 fichier(s)       45ÿ646ÿ589 octets
               2 R‚p(s)  373ÿ317ÿ238ÿ784 octets libres


In [3]:
def lectureFichiersMeteo(repertoire='../donnees/meteo'):
    listeFichiers = glob.glob(os.path.join(repertoire, '**', 'synop*.csv'), recursive=True)
        
    meteo = pd.concat([ pd.read_csv(fichier,
                           sep=';',
                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                           na_values='mq',
                           dtype={'numer_sta':str,'date':str},
                          ).rename( columns={'numer_sta':'Station',
                                              'date':'DateHeure',
                                              'dd':'DirectionVent',
                                              'ff':'VitesseVent',
                                              't':'Temperature',
                                              'u':'Humidite',
                                              'vv':'Visibilite',
                                              'pres':'Pression',
                                              'rr1' :'Precipitation01',
                                              'rr3' :'Precipitation03',
                                              'rr6' :'Precipitation06',
                                              'rr12':'Precipitation12',
                                              'rr24':'Precipitation24'}) 
                         for fichier in listeFichiers],
                         ignore_index=True)
    
    
    meteo["DateHeure"]     = pd.to_datetime(meteo["DateHeure"], format='%Y%m%d%H%M%S')
    meteo.Temperature      = meteo.Temperature - 273.15
    meteo.Pression         = meteo.Pression / 100
    meteo.Visibilite       = meteo.Visibilite / 1000

    meteo['Precipitation'] =  meteo['Precipitation03'].combine_first(meteo['Precipitation06']/2)\
                                                      .combine_first(meteo['Precipitation12']/4)\
                                                      .combine_first(meteo['Precipitation24']/8)\
                                                      .combine_first(meteo['Precipitation01']*3)
    #coalesce 
    
    meteo.drop(columns=['Precipitation06',
                        'Precipitation12',
                        'Precipitation24',
                        'Precipitation01',
                        'Precipitation03'], inplace=True)

    meteo['Jour']          = meteo.DateHeure.dt.dayofyear
    meteo['Semaine']       = meteo.DateHeure.dt.isocalendar().week
    meteo['Mois']          = meteo.DateHeure.dt.month
    meteo['Annee']         = meteo.DateHeure.dt.year
    meteo['MoisJour']      = meteo.DateHeure.dt.month*100 + meteo.DateHeure.dt.day
    meteo['AnneeMois']     = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.month
    meteo['AnneeSemaine']  = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.isocalendar().week
    meteo['AnneeJour']     = meteo.DateHeure.dt.year*1000 + meteo.DateHeure.dt.day
    meteo['Heure']         = meteo.DateHeure.dt.hour
    meteo['JourNuit']      = meteo.DateHeure.dt.hour.apply(lambda x: 'jour' if ( x >=9 ) & ( x<=18) else 'nuit')

    return meteo    

## La lecture des fichiers d'un répértoire 

In [4]:
%%time
donnees = lectureFichiersMeteo(repertoire='../donnees/meteo')

CPU times: total: 953 ms
Wall time: 976 ms


In [5]:
donnees.tail()

Unnamed: 0,Station,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation,Jour,Semaine,Mois,Annee,MoisJour,AnneeMois,AnneeSemaine,AnneeJour,Heure,JourNuit
339158,81401,2025-12-31 21:00:00,120.0,1.6,24.6,,,1012.4,12.1,365,1,12,2025,1231,202512,202501,2025031,21,nuit
339159,81405,2025-12-31 21:00:00,60.0,5.2,28.2,77.0,47.75,1012.3,0.0,365,1,12,2025,1231,202512,202501,2025031,21,nuit
339160,81408,2025-12-31 21:00:00,70.0,2.3,29.0,,,1011.5,0.0,365,1,12,2025,1231,202512,202501,2025031,21,nuit
339161,81415,2025-12-31 21:00:00,110.0,1.3,32.2,,,999.5,0.0,365,1,12,2025,1231,202512,202501,2025031,21,nuit
339162,89642,2025-12-31 21:00:00,100.0,8.2,0.1,98.0,1.5,984.6,,365,1,12,2025,1231,202512,202501,2025031,21,nuit


In [6]:
donnees.shape

(339163, 19)

In [7]:
postes = pd.read_csv('../donnees/postesSynop.csv',sep=';',dtype={'ID':str})

In [8]:
postes = pd.read_csv('../donnees/postesSynop.csv',sep=';',dtype={'ID':str})
postes.Nom =postes.Nom.apply(lambda x : x if x in ['CLERMONT-FD','MONT-DE-MARSAN',
                                       'ST-PIERRE','ST-BARTHELEMY METEO'] 
                               else x[0:x.find('-')] 
                                    if x.find('-') != -1 else x).apply(lambda x : str(x).title())
postes.Altitude = postes.Altitude.astype('int16')
postes = postes[postes.ID < '08000']

In [9]:
postes.loc[postes.Latitude  < postes.Latitude.mean(),'Zone'] = 'S'
postes.loc[postes.Latitude  > postes.Latitude.mean(),'Zone'] = 'N'
postes.loc[postes.Longitude < postes.Longitude.mean(),'Zone'] += 'O'
postes.loc[postes.Longitude > postes.Longitude.mean(),'Zone'] += 'E'

In [10]:
postes.head()

Unnamed: 0,ID,Nom,Latitude,Longitude,Altitude,Zone
0,7005,Abbeville,50.136,1.834,69,NO
1,7015,Lille,50.57,3.0975,47,NE
2,7020,Pte De La Hague,49.725167,-1.939833,6,NO
3,7027,Caen,49.18,-0.456167,67,NO
4,7037,Rouen,49.383,1.181667,151,NO


In [11]:
donnees = postes.merge(donnees, how = "inner", 
                     left_on = "ID", right_on = "Station").drop(["ID","Station"], axis = "columns")

In [12]:
donnees.set_index(['Nom', 'Latitude', 'Longitude', 'Altitude', 'Zone'],inplace=True)

In [13]:
donnees.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation,Jour,Semaine,Mois,Annee,MoisJour,AnneeMois,AnneeSemaine,AnneeJour,Heure,JourNuit
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Abbeville,50.136,1.834,69,NO,2024-01-01 00:00:00,240.0,6.5,7.4,81.0,15.46,993.2,1.0,1,1,1,2024,101,202401,202401,2024001,0,nuit
Abbeville,50.136,1.834,69,NO,2024-01-01 03:00:00,230.0,4.2,6.9,84.0,15.58,993.8,1.0,1,1,1,2024,101,202401,202401,2024001,3,nuit
Abbeville,50.136,1.834,69,NO,2024-01-01 06:00:00,240.0,4.0,7.0,87.0,19.6,995.7,3.3,1,1,1,2024,101,202401,202401,2024001,6,nuit
Abbeville,50.136,1.834,69,NO,2024-01-01 09:00:00,230.0,3.3,7.1,85.0,17.54,998.9,-0.1,1,1,1,2024,101,202401,202401,2024001,9,jour
Abbeville,50.136,1.834,69,NO,2024-01-01 12:00:00,240.0,4.2,8.1,78.0,20.0,999.8,0.2,1,1,1,2024,101,202401,202401,2024001,12,jour


In [14]:
donnees.reset_index(inplace=True)

In [15]:
donnees.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,...,Jour,Semaine,Mois,Annee,MoisJour,AnneeMois,AnneeSemaine,AnneeJour,Heure,JourNuit
0,Abbeville,50.136,1.834,69,NO,2024-01-01 00:00:00,240.0,6.5,7.4,81.0,...,1,1,1,2024,101,202401,202401,2024001,0,nuit
1,Abbeville,50.136,1.834,69,NO,2024-01-01 03:00:00,230.0,4.2,6.9,84.0,...,1,1,1,2024,101,202401,202401,2024001,3,nuit
2,Abbeville,50.136,1.834,69,NO,2024-01-01 06:00:00,240.0,4.0,7.0,87.0,...,1,1,1,2024,101,202401,202401,2024001,6,nuit
3,Abbeville,50.136,1.834,69,NO,2024-01-01 09:00:00,230.0,3.3,7.1,85.0,...,1,1,1,2024,101,202401,202401,2024001,9,jour
4,Abbeville,50.136,1.834,69,NO,2024-01-01 12:00:00,240.0,4.2,8.1,78.0,...,1,1,1,2024,101,202401,202401,2024001,12,jour


In [16]:
donnees.to_parquet('../donnees/donnees_meteo.gzip',compression='gzip', engine='pyarrow')

In [17]:
!dir ..\donnees\donnees_meteo.gzip

 Le volume dans le lecteur C s'appelle Windows
 Le num�ro de s�rie du volume est 7E56-F105

 R�pertoire de C:\dev\AnalyseTraitementDonnees\donnees

02/02/2026  19:19         1�838�350 donnees_meteo.gzip
               1 fichier(s)        1�838�350 octets
               0 R�p(s)  366�556�721�152 octets libres


In [18]:
meteo = pd.read_parquet('../donnees/donnees_meteo.gzip', engine='pyarrow')

In [19]:
meteo.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,...,Jour,Semaine,Mois,Annee,MoisJour,AnneeMois,AnneeSemaine,AnneeJour,Heure,JourNuit
0,Abbeville,50.136,1.834,69,NO,2024-01-01 00:00:00,240.0,6.5,7.4,81.0,...,1,1,1,2024,101,202401,202401,2024001,0,nuit
1,Abbeville,50.136,1.834,69,NO,2024-01-01 03:00:00,230.0,4.2,6.9,84.0,...,1,1,1,2024,101,202401,202401,2024001,3,nuit
2,Abbeville,50.136,1.834,69,NO,2024-01-01 06:00:00,240.0,4.0,7.0,87.0,...,1,1,1,2024,101,202401,202401,2024001,6,nuit
3,Abbeville,50.136,1.834,69,NO,2024-01-01 09:00:00,230.0,3.3,7.1,85.0,...,1,1,1,2024,101,202401,202401,2024001,9,jour
4,Abbeville,50.136,1.834,69,NO,2024-01-01 12:00:00,240.0,4.2,8.1,78.0,...,1,1,1,2024,101,202401,202401,2024001,12,jour


In [20]:
meteo.reset_index(inplace=True)
meteo.columns

Index(['index', 'Nom', 'Latitude', 'Longitude', 'Altitude', 'Zone',
       'DateHeure', 'DirectionVent', 'VitesseVent', 'Temperature', 'Humidite',
       'Visibilite', 'Pression', 'Precipitation', 'Jour', 'Semaine', 'Mois',
       'Annee', 'MoisJour', 'AnneeMois', 'AnneeSemaine', 'AnneeJour', 'Heure',
       'JourNuit'],
      dtype='object')

In [21]:
meteo = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude', 'Zone', 'DateHeure', 'Jour', 'Semaine', 'Mois', 'Annee',
       'MoisJour', 'AnneeMois', 'AnneeSemaine', 'AnneeJour', 'Heure', 'JourNuit',
       'DirectionVent', 'VitesseVent', 'Temperature', 'Humidite', 'Visibilite',
       'Pression', 'Precipitation']]

In [22]:
meteo.to_parquet('../donnees/donnees_meteo.gzip',compression='gzip', engine='pyarrow')

In [23]:
meteo.to_parquet('../donnees/donnees_meteo.parquet',compression=None, engine='pyarrow')

In [24]:
meteo.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,Jour,Semaine,Mois,Annee,...,AnneeJour,Heure,JourNuit,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation
0,Abbeville,50.136,1.834,69,NO,2024-01-01 00:00:00,1,1,1,2024,...,2024001,0,nuit,240.0,6.5,7.4,81.0,15.46,993.2,1.0
1,Abbeville,50.136,1.834,69,NO,2024-01-01 03:00:00,1,1,1,2024,...,2024001,3,nuit,230.0,4.2,6.9,84.0,15.58,993.8,1.0
2,Abbeville,50.136,1.834,69,NO,2024-01-01 06:00:00,1,1,1,2024,...,2024001,6,nuit,240.0,4.0,7.0,87.0,19.6,995.7,3.3
3,Abbeville,50.136,1.834,69,NO,2024-01-01 09:00:00,1,1,1,2024,...,2024001,9,jour,230.0,3.3,7.1,85.0,17.54,998.9,-0.1
4,Abbeville,50.136,1.834,69,NO,2024-01-01 12:00:00,1,1,1,2024,...,2024001,12,jour,240.0,4.2,8.1,78.0,20.0,999.8,0.2


In [25]:
%%time
meteo = pd.read_parquet('../donnees/donnees_meteo.parquet', engine='pyarrow')
meteo.dtypes

CPU times: total: 141 ms
Wall time: 20.2 ms


Nom                      object
Latitude                float64
Longitude               float64
Altitude                  int16
Zone                     object
DateHeure        datetime64[ns]
Jour                      int32
Semaine                  UInt32
Mois                      int32
Annee                     int32
MoisJour                  int32
AnneeMois                 int32
AnneeSemaine              Int64
AnneeJour                 int32
Heure                     int32
JourNuit                 object
DirectionVent           float64
VitesseVent             float64
Temperature             float64
Humidite                float64
Visibilite              float64
Pression                float64
Precipitation           float64
dtype: object

In [26]:
meteo.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,Jour,Semaine,Mois,Annee,...,AnneeJour,Heure,JourNuit,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation
0,Abbeville,50.136,1.834,69,NO,2024-01-01 00:00:00,1,1,1,2024,...,2024001,0,nuit,240.0,6.5,7.4,81.0,15.46,993.2,1.0
1,Abbeville,50.136,1.834,69,NO,2024-01-01 03:00:00,1,1,1,2024,...,2024001,3,nuit,230.0,4.2,6.9,84.0,15.58,993.8,1.0
2,Abbeville,50.136,1.834,69,NO,2024-01-01 06:00:00,1,1,1,2024,...,2024001,6,nuit,240.0,4.0,7.0,87.0,19.6,995.7,3.3
3,Abbeville,50.136,1.834,69,NO,2024-01-01 09:00:00,1,1,1,2024,...,2024001,9,jour,230.0,3.3,7.1,85.0,17.54,998.9,-0.1
4,Abbeville,50.136,1.834,69,NO,2024-01-01 12:00:00,1,1,1,2024,...,2024001,12,jour,240.0,4.2,8.1,78.0,20.0,999.8,0.2


In [27]:
meteo.shape

(235780, 23)

In [28]:
donnees = meteo.reset_index().groupby( ['Nom','Annee']).agg({'Temperature':['mean','median'], 
                                                             'Humidite':['mean','median'], 
                                                             'Visibilite':['mean','median']})
donnees.head(28)

Unnamed: 0_level_0,Unnamed: 1_level_0,Temperature,Temperature,Humidite,Humidite,Visibilite,Visibilite
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median
Nom,Annee,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Abbeville,2024,11.741866,11.6,83.143882,86.0,16.049878,20.0
Abbeville,2025,11.340824,11.8,80.444105,85.0,16.404647,20.0
Ajaccio,2024,17.089589,16.5,73.951027,75.0,38.969286,40.87
Ajaccio,2025,17.291867,16.5,73.404085,74.0,41.265839,44.95
Alencon,2024,11.772087,11.6,82.836693,87.0,16.297936,20.0
Alencon,2025,12.074827,11.9,78.315213,83.0,16.213427,20.0
Bale,2024,12.254868,11.8,80.155449,85.0,26.147151,23.91
Bale,2025,11.875255,11.5,77.449344,83.0,26.949202,24.085
Bastia,2024,17.161119,16.6,73.074242,75.0,30.067798,28.46
Bastia,2025,17.22521,16.1,72.56266,74.0,43.721346,60.0


In [29]:
donnees = meteo.groupby( ['Annee']).agg({'Temperature':['mean','median'], 
                                                             'Humidite':['mean','median'], 
                                                             'Visibilite':['mean','median']})
donnees.head(28)

Unnamed: 0_level_0,Temperature,Temperature,Humidite,Humidite,Visibilite,Visibilite
Unnamed: 0_level_1,mean,median,mean,median,mean,median
Annee,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2024,13.311918,13.0,78.363618,82.0,24.83964,20.0
2025,13.486221,13.2,75.155951,79.0,26.876944,20.0


In [31]:
donnees = meteo.groupby( ['Jour']).agg({'Temperature':'mean',
                                         'Humidite':'mean', 
                                         'Visibilite':'mean'})
donnees.head(28)

Unnamed: 0_level_0,Temperature,Humidite,Visibilite
Jour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,6.842813,82.837461,23.808874
2,7.917378,83.930556,23.345057
3,7.458079,82.103395,23.962622
4,6.741069,83.695518,22.817873
5,8.816183,86.636785,23.806748
6,7.872672,81.017002,28.615367
7,5.710398,80.012384,30.084109
8,5.178834,80.319876,24.301475
9,4.097235,79.92846,22.203466
10,3.350998,83.169518,18.976036


In [32]:
donnees = meteo.groupby( ['Nom','Jour']).agg({'Temperature':['mean','min','max','std'],
                                         'Humidite':'mean', 
                                         'Visibilite':'mean'}).reset_index()

In [33]:
donnees.head()

Unnamed: 0_level_0,Nom,Jour,Temperature,Temperature,Temperature,Temperature,Humidite,Visibilite
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,min,max,std,mean,mean
0,Abbeville,1,8.1125,5.6,11.5,1.495716,83.5,17.614375
1,Abbeville,2,7.43125,-1.0,13.1,4.59111,92.6875,16.1475
2,Abbeville,3,5.55625,-1.4,11.5,5.080285,82.8125,19.085625
3,Abbeville,4,4.78125,-1.0,10.6,4.774686,84.9375,18.53625
4,Abbeville,5,8.26875,0.5,12.2,3.360896,85.6875,17.453125


In [34]:
donnees[donnees.Nom == 'Strasbourg'].head(28)

Unnamed: 0_level_0,Nom,Jour,Temperature,Temperature,Temperature,Temperature,Humidite,Visibilite
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,min,max,std,mean,mean
13542,Strasbourg,1,4.76875,-2.7,9.6,3.812911,79.1875,28.368125
13543,Strasbourg,2,7.3625,2.1,11.1,2.869117,79.4375,33.433125
13544,Strasbourg,3,5.98125,-2.6,12.3,5.198489,78.75,33.571875
13545,Strasbourg,4,3.9375,-3.9,10.9,5.489976,80.5625,29.360625
13546,Strasbourg,5,6.8375,-0.7,11.7,3.660032,86.4375,33.48625
13547,Strasbourg,6,7.95,3.4,15.2,3.590914,76.4375,45.13
13548,Strasbourg,7,4.775,-0.5,8.6,2.271417,79.0,34.33375
13549,Strasbourg,8,1.19375,-2.6,4.3,2.292151,82.1875,27.10875
13550,Strasbourg,9,2.41875,-5.3,11.9,6.65254,73.5625,28.313125
13551,Strasbourg,10,0.18125,-4.4,5.9,3.217498,74.9375,25.23125


In [35]:
donnees.isna().sum()

Nom                    0
Jour                   0
Temperature  mean    366
             min     366
             max     366
             std     366
Humidite     mean    419
Visibilite   mean    376
dtype: int64

In [37]:
donnees.columns

MultiIndex([(      'index',     ''),
            (        'Nom',     ''),
            (       'Jour',     ''),
            ('Temperature', 'mean'),
            ('Temperature',  'min'),
            ('Temperature',  'max'),
            ('Temperature',  'std'),
            (   'Humidite', 'mean'),
            ( 'Visibilite', 'mean')],
           )