<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Les-imports-et-configuration-du-document" data-toc-modified-id="Les-imports-et-configuration-du-document-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Les imports et configuration du document</a></span><ul class="toc-item"><li><span><a href="#La-lecture-des-fichiers-d'un-répértoire" data-toc-modified-id="La-lecture-des-fichiers-d'un-répértoire-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>La lecture des fichiers d'un répértoire</a></span></li></ul></li><li><span><a href="#La-temperature-mensuelle" data-toc-modified-id="La-temperature-mensuelle-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>La temperature mensuelle</a></span></li><li><span><a href="#La-météo-mensuelle" data-toc-modified-id="La-météo-mensuelle-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>La météo mensuelle</a></span></li><li><span><a href="#La-météo-----hebdomadaire" data-toc-modified-id="La-météo-----hebdomadaire-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>La météo     hebdomadaire</a></span></li><li><span><a href="#La-météo-----journalière" data-toc-modified-id="La-météo-----journalière-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>La météo     journalière</a></span></li></ul></div>

# Les imports et configuration du document

In [1]:
import pandas as pd 
import seaborn as sns
import warnings 
import os
import time
from datetime import datetime
from matplotlib import pyplot as plt

warnings.filterwarnings(action="ignore")

%matplotlib inline
if int(str(sns.__version__).split('.')[1]) > 8 : 
    plt.style.use('seaborn-v0_8-darkgrid')
else:
    plt.style.use('seaborn-darkgrid')
    
sns.set(font_scale=3)

# os.chdir("donnees")

In [2]:
def tempsExecution(func):
    def chronometrerExécution(*_args, **_kwargs): 
        t0 = time.perf_counter()
        result = func(*_args, **_kwargs) 
        elapsed = time.perf_counter() - t0
        name = func.__name__
        args = ', '.join(repr(arg) for arg in _args) \
               if len(_args) == 1 \
               else ', '.join(arg+'='+repr(_kwargs[arg]) for arg in _kwargs)
        print(f'exécution {elapsed:0.8f}s\nfonction {name}({args})')
        return result
    return chronometrerExécution

In [3]:
@tempsExecution
def lectureFichiersMeteo(repertoire='../donnees/meteo_test'):
    meteo = pd.DataFrame()

    for dirname, _, filenames in os.walk(repertoire):
        for filename in filenames:
            # if filename.rfind('synop') >= 0 :
            meteo = pd.concat([meteo,
                               pd.read_csv(os.path.join(dirname, filename),
                                           sep=';',
                                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                                           na_values='mq',
                                           dtype={'numer_sta':str,'date':str},
                                          ).rename( columns={'numer_sta':'Station',
                                                              'date':'DateHeure',
                                                              'dd':'DirectionVent',
                                                              'ff':'VitesseVent',
                                                              't':'Temperature',
                                                              'u':'Humidite',
                                                              'vv':'Visibilite',
                                                              'pres':'Pression',
                                                              'rr1' :'Precipitation01',
                                                              'rr3' :'Precipitation03',
                                                              'rr6' :'Precipitation06',
                                                              'rr12':'Precipitation12',
                                                              'rr24':'Precipitation24'})],axis=0)
    meteo.reset_index().drop(columns='index',inplace=True)
    meteo["DateHeure"] = pd.to_datetime(meteo["DateHeure"], format='%Y%m%d%H%M%S')
    meteo.Temperature  = meteo.Temperature - 273.15
    meteo.Pression     = meteo.Pression / 100
    meteo.Visibilite   = meteo.Visibilite / 1000
    meteo['Mois']      = meteo.DateHeure.dt.month
    meteo['Annee']     = meteo.DateHeure.dt.year
    meteo['AnneeMois'] = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.month
    meteo['Semaine']   = meteo.DateHeure.dt.isocalendar().week
    meteo['MoisJour']  = meteo.DateHeure.dt.month*100 + meteo.DateHeure.dt.day
    meteo['JourA']     = meteo.DateHeure.dt.dayofyear
    return meteo

In [4]:
_ = lectureFichiersMeteo(repertoire='../donnees/meteo')

exécution 19.39969148s
fonction lectureFichiersMeteo(repertoire='../donnees/meteo')


In [5]:
@tempsExecution
def lectureFichiersMeteo(repertoire='../donnees/meteo_test'):
    listeFichiers = []
    for dirname, dirnames, filenames in os.walk(repertoire):
        for filename in filenames:
            listeFichiers.append(os.path.join(dirname, filename))
        
    meteo = pd.concat([ pd.read_csv(fichier,
                           sep=';',
                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                           na_values='mq',
                           dtype={'numer_sta':str,'date':str},
                          ).rename( columns={'numer_sta':'Station',
                                              'date':'DateHeure',
                                              'dd':'DirectionVent',
                                              'ff':'VitesseVent',
                                              't':'Temperature',
                                              'u':'Humidite',
                                              'vv':'Visibilite',
                                              'pres':'Pression',
                                              'rr1' :'Precipitation01',
                                              'rr3' :'Precipitation03',
                                              'rr6' :'Precipitation06',
                                              'rr12':'Precipitation12',
                                              'rr24':'Precipitation24'}) 
                         for fichier in listeFichiers])
    meteo.reset_index().drop(columns='index',inplace=True)
    meteo["DateHeure"] = pd.to_datetime(meteo["DateHeure"], format='%Y%m%d%H%M%S')
    meteo.Temperature  = meteo.Temperature - 273.15
    meteo.Pression     = meteo.Pression / 100
    meteo.Visibilite   = meteo.Visibilite / 1000
    meteo['Mois']      = meteo.DateHeure.dt.month
    meteo['Annee']     = meteo.DateHeure.dt.year
    meteo['AnneeMois'] = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.month
    meteo['Semaine']   = meteo.DateHeure.dt.isocalendar().week
    meteo['MoisJour']  = meteo.DateHeure.dt.month*100 + meteo.DateHeure.dt.day
    meteo['JourA']     = meteo.DateHeure.dt.dayofyear
    return meteo
    

In [6]:
_ = lectureFichiersMeteo(repertoire='../donnees/meteo')

exécution 5.59486289s
fonction lectureFichiersMeteo(repertoire='../donnees/meteo')


In [7]:
@tempsExecution
def lectureFichiersMeteo(repertoire='../donnees/meteo_test'):
    meteo = pd.concat([ pd.read_csv(fichier,
                           sep=';',
                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                           na_values='mq',
                           dtype={'numer_sta':str,'date':str},
                          ).rename( columns={'numer_sta':'Station',
                                              'date':'DateHeure',
                                              'dd':'DirectionVent',
                                              'ff':'VitesseVent',
                                              't':'Temperature',
                                              'u':'Humidite',
                                              'vv':'Visibilite',
                                              'pres':'Pression',
                                              'rr1' :'Precipitation01',
                                              'rr3' :'Precipitation03',
                                              'rr6' :'Precipitation06',
                                              'rr12':'Precipitation12',
                                              'rr24':'Precipitation24'}) 
                         for fichier in [ f'{dirname}{os.sep}{filename}' for dirname, dirnames, filenames in os.walk(repertoire) 
                                                                         for filename in filenames]])
    meteo.reset_index().drop(columns='index',inplace=True)
    meteo["DateHeure"] = pd.to_datetime(meteo["DateHeure"], format='%Y%m%d%H%M%S')
    meteo.Temperature  = meteo.Temperature - 273.15
    meteo.Pression     = meteo.Pression / 100
    meteo.Visibilite   = meteo.Visibilite / 1000
    meteo['Mois']      = meteo.DateHeure.dt.month
    meteo['Annee']     = meteo.DateHeure.dt.year
    meteo['AnneeMois'] = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.month
    meteo['Semaine']   = meteo.DateHeure.dt.isocalendar().week
    meteo['MoisJour']  = meteo.DateHeure.dt.month*100 + meteo.DateHeure.dt.day
    meteo['JourA']     = meteo.DateHeure.dt.dayofyear
    return meteo
    

In [8]:
_ = lectureFichiersMeteo(repertoire='../donnees/meteo')

exécution 5.55175639s
fonction lectureFichiersMeteo(repertoire='../donnees/meteo')


## La lecture des fichiers d'un répértoire 

In [9]:
%%time
meteo = lectureFichiersMeteo(repertoire='../donnees/meteo')

exécution 5.53652898s
fonction lectureFichiersMeteo(repertoire='../donnees/meteo')
CPU times: user 5.23 s, sys: 404 ms, total: 5.64 s
Wall time: 5.54 s


In [10]:
meteo.tail()

Unnamed: 0,Station,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation01,Precipitation03,Precipitation06,Precipitation12,Precipitation24,Mois,Annee,AnneeMois,Semaine,MoisJour,JourA
14589,78925,2023-09-30 21:00:00,60.0,2.4,30.3,78.0,60.0,1007.2,0.0,1.7,21.4,21.4,21.4,9,2023,202309,39,930,273
14590,81401,2023-09-30 21:00:00,60.0,1.8,31.1,65.0,,1007.2,0.0,0.0,0.0,0.0,0.0,9,2023,202309,39,930,273
14591,81405,2023-09-30 21:00:00,100.0,3.7,30.1,73.0,50.43,1007.9,0.0,0.0,0.0,0.0,0.0,9,2023,202309,39,930,273
14592,81408,2023-09-30 21:00:00,90.0,3.6,32.0,57.0,,1007.6,0.0,0.0,0.0,0.2,0.2,9,2023,202309,39,930,273
14593,89642,2023-09-30 21:00:00,140.0,13.9,-21.3,49.0,,981.2,,,,,,9,2023,202309,39,930,273


In [11]:
meteo.shape

(4703265, 19)

In [12]:
postes = pd.read_csv('../donnees/postesSynop.csv',sep=';',dtype={'ID':str})
postes.Nom =postes.Nom.apply(lambda x : x if x in ['CLERMONT-FD','MONT-DE-MARSAN',
                                       'ST-PIERRE','ST-BARTHELEMY METEO'] 
                               else x[0:x.find('-')] 
                                    if x.find('-') != -1 else x).apply(lambda x : str(x).title())
postes.Altitude = postes.Altitude.astype('int16')
postes = postes[postes.ID < '08000']

In [13]:
postes.loc[postes.Latitude  < postes.Latitude.mean(),'Zone'] = 'S'
postes.loc[postes.Latitude  > postes.Latitude.mean(),'Zone'] = 'N'
postes.loc[postes.Longitude < postes.Longitude.mean(),'Zone'] += 'O'
postes.loc[postes.Longitude > postes.Longitude.mean(),'Zone'] += 'E'

In [14]:
postes.head()

Unnamed: 0,ID,Nom,Latitude,Longitude,Altitude,Zone
0,7005,Abbeville,50.136,1.834,69,NO
1,7015,Lille,50.57,3.0975,47,NE
2,7020,Pte De La Hague,49.725167,-1.939833,6,NO
3,7027,Caen,49.18,-0.456167,67,NO
4,7037,Rouen,49.383,1.181667,151,NO


In [15]:
meteo = postes.merge(meteo, how = "inner", 
                     left_on = "ID", right_on = "Station").drop(["ID","Station"], axis = "columns")

In [16]:
meteo['Precipitation'] =  meteo['Precipitation03'].combine_first(meteo['Precipitation06']/2)\
                                                  .combine_first(meteo['Precipitation12']/4)\
                                                  .combine_first(meteo['Precipitation24']/8)\
                                                  .combine_first(meteo['Precipitation01']*3)

In [17]:
meteo.drop(columns=['Precipitation06',
                    'Precipitation12',
                    'Precipitation24',
                    'Precipitation01',
                    'Precipitation03'], inplace=True)

In [18]:
meteo.to_parquet('../donnees/meteo.gzip',compression='gzip', engine='pyarrow')

In [19]:
!ls -al ../donnees/meteo.gzip

-rw-rw-r-- 1 razvan razvan 27219313 déc.  20 13:35 ../donnees/meteo.gzip


In [20]:
meteo.to_csv('../donnees/meteo.csv')

In [21]:
%%time
meteo = pd.read_parquet('../donnees/meteo.gzip', engine='pyarrow')
meteo.dtypes

CPU times: user 468 ms, sys: 352 ms, total: 820 ms
Wall time: 165 ms


Nom                      object
Latitude                float64
Longitude               float64
Altitude                  int16
Zone                     object
DateHeure        datetime64[ns]
DirectionVent           float64
VitesseVent             float64
Temperature             float64
Humidite                float64
Visibilite              float64
Pression                float64
Mois                      int32
Annee                     int32
AnneeMois                 int64
Semaine                  UInt32
MoisJour                  int64
JourA                     int32
Precipitation           float64
dtype: object

In [22]:
%%time
pd.read_csv('../donnees/meteo.csv').dtypes

CPU times: user 1.94 s, sys: 391 ms, total: 2.34 s
Wall time: 2.31 s


Unnamed: 0         int64
Nom               object
Latitude         float64
Longitude        float64
Altitude           int64
Zone              object
DateHeure         object
DirectionVent    float64
VitesseVent      float64
Temperature      float64
Humidite         float64
Visibilite       float64
Pression         float64
Mois               int64
Annee              int64
AnneeMois          int64
Semaine            int64
MoisJour           int64
JourA              int64
Precipitation    float64
dtype: object

In [23]:
meteo.isna().sum()

Nom                   0
Latitude              0
Longitude             0
Altitude              0
Zone                  0
DateHeure             0
DirectionVent      8224
VitesseVent        7773
Temperature       14342
Humidite          18041
Visibilite       442045
Pression          20811
Mois                  0
Annee                 0
AnneeMois             0
Semaine               0
MoisJour              0
JourA                 0
Precipitation     29308
dtype: int64

In [24]:
meteo.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Mois,Annee,AnneeMois,Semaine,MoisJour,JourA,Precipitation
0,Abbeville,50.136,1.834,69,NO,2012-01-01 00:00:00,240.0,7.2,12.0,90.0,20.0,1004.2,1,2012,201201,52,101,1,0.0
1,Abbeville,50.136,1.834,69,NO,2012-01-01 03:00:00,220.0,6.2,11.9,94.0,9.0,1003.6,1,2012,201201,52,101,1,0.0
2,Abbeville,50.136,1.834,69,NO,2012-01-01 06:00:00,210.0,5.7,11.8,96.0,6.0,1003.1,1,2012,201201,52,101,1,0.8
3,Abbeville,50.136,1.834,69,NO,2012-01-01 09:00:00,200.0,5.1,12.0,96.0,4.0,1003.2,1,2012,201201,52,101,1,0.8
4,Abbeville,50.136,1.834,69,NO,2012-01-01 12:00:00,200.0,6.2,13.0,95.0,20.0,1001.1,1,2012,201201,52,101,1,0.4


In [25]:
meteo.columns

Index(['Nom', 'Latitude', 'Longitude', 'Altitude', 'Zone', 'DateHeure',
       'DirectionVent', 'VitesseVent', 'Temperature', 'Humidite', 'Visibilite',
       'Pression', 'Mois', 'Annee', 'AnneeMois', 'Semaine', 'MoisJour',
       'JourA', 'Precipitation'],
      dtype='object')

# La temperature mensuelle

In [26]:
temperatures = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'Temperature','Mois']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='Mois')
temperatures.columns = ['janv.','févr.','mars','avr.','mai','juin','juill.','août','sept.','oct.','nov.','déc.']
temperatures.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,janv.,févr.,mars,avr.,mai,juin,juill.,août,sept.,oct.,nov.,déc.
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Abbeville,50.136,1.834,69,NO,4.506655,5.177926,7.187374,9.897527,12.944717,15.927296,17.647393,17.950155,15.357589,12.012794,7.906183,5.095252
Ajaccio,41.918,8.792667,5,SE,9.013655,9.098936,10.988942,13.637566,17.484245,21.511102,23.863274,23.911528,20.787859,17.452011,13.180196,10.003745
Alencon,48.4455,0.110167,143,NO,4.579525,5.122644,7.419778,10.159313,13.537345,16.898559,18.620894,18.550157,15.467162,11.902327,7.772221,4.985914
Bale,47.614333,7.51,263,NE,2.157652,3.353058,6.649048,10.686277,14.948834,18.782333,20.190336,19.770608,15.43266,11.191408,6.100353,2.917112
Bastia,42.540667,9.485167,10,SE,9.188277,9.326488,11.185525,13.803358,17.862716,22.039493,24.835508,24.852453,21.187113,17.405394,13.241846,10.128839


In [27]:
temperatures.to_parquet('../donnees/temperatures.gzip',compression='gzip', engine='pyarrow')

In [28]:
temperatures.to_csv('../donnees/temperatures.csv')

In [29]:
%%time
temperatures = pd.read_parquet('../donnees/temperatures.gzip', engine='pyarrow')
temperatures.dtypes

CPU times: user 4.56 ms, sys: 140 µs, total: 4.7 ms
Wall time: 2.98 ms


janv.     float64
févr.     float64
mars      float64
avr.      float64
mai       float64
juin      float64
juill.    float64
août      float64
sept.     float64
oct.      float64
nov.      float64
déc.      float64
dtype: object

In [30]:
temperatures.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,janv.,févr.,mars,avr.,mai,juin,juill.,août,sept.,oct.,nov.,déc.
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Abbeville,50.136,1.834,69,NO,4.506655,5.177926,7.187374,9.897527,12.944717,15.927296,17.647393,17.950155,15.357589,12.012794,7.906183,5.095252
Ajaccio,41.918,8.792667,5,SE,9.013655,9.098936,10.988942,13.637566,17.484245,21.511102,23.863274,23.911528,20.787859,17.452011,13.180196,10.003745
Alencon,48.4455,0.110167,143,NO,4.579525,5.122644,7.419778,10.159313,13.537345,16.898559,18.620894,18.550157,15.467162,11.902327,7.772221,4.985914
Bale,47.614333,7.51,263,NE,2.157652,3.353058,6.649048,10.686277,14.948834,18.782333,20.190336,19.770608,15.43266,11.191408,6.100353,2.917112
Bastia,42.540667,9.485167,10,SE,9.188277,9.326488,11.185525,13.803358,17.862716,22.039493,24.835508,24.852453,21.187113,17.405394,13.241846,10.128839


# La météo mensuelle

In [31]:
meteoM = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'VitesseVent', 'Temperature', 'Humidite', 
                'Visibilite','Pression','Precipitation', 
                'Mois']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='Mois')
meteoM.columns = [f'{x[1]:02d}{x[0]}' for x in meteoM.columns]
meteoM.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,03VitesseVent,04VitesseVent,05VitesseVent,06VitesseVent,07VitesseVent,08VitesseVent,09VitesseVent,10VitesseVent,11VitesseVent,12VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,86.820309,83.386775,80.342539,75.544287,77.929048,78.19847,77.074354,77.662511,80.211285,84.563378,...,4.766778,4.259925,4.192671,3.85158,3.928874,3.710001,3.701641,4.141725,4.28057,4.627865
Ajaccio,41.918,8.792667,5,SE,77.460203,75.654736,74.375018,74.375616,73.385373,70.521941,67.584996,67.696418,70.5,74.7479,...,3.236731,3.300194,3.293887,3.437895,3.602082,3.513099,3.389571,3.087275,3.195869,3.201756
Alencon,48.4455,0.110167,143,NO,88.790933,84.026838,79.322511,74.508081,75.291911,73.488845,70.721321,72.583467,76.561302,84.752133,...,3.709711,3.399098,3.417587,3.215792,3.271606,2.97094,2.887146,3.103692,3.344426,3.615989
Bale,47.614333,7.51,263,NE,84.569286,78.805793,72.944742,69.282028,71.963199,70.75843,69.7011,73.679607,78.735722,83.722832,...,2.897102,2.703953,2.666662,2.465817,2.423812,2.166816,2.178307,2.337874,2.55246,2.861031
Bastia,42.540667,9.485167,10,SE,73.379765,71.032745,71.78486,73.320911,72.418883,70.409662,66.36551,66.179326,68.685714,76.246392,...,3.166822,2.898373,2.658234,2.618459,2.7423,2.714049,2.821584,2.786304,3.034028,3.183148


In [32]:
meteoM.to_parquet('../donnees/meteoM.gzip',compression='gzip', engine='pyarrow')

In [33]:
%%time
meteoM = pd.read_parquet('../donnees/meteoM.gzip', engine='pyarrow')
meteoM.dtypes

CPU times: user 7.18 ms, sys: 794 µs, total: 7.97 ms
Wall time: 4.9 ms


01Humidite       float64
02Humidite       float64
03Humidite       float64
04Humidite       float64
05Humidite       float64
                  ...   
08VitesseVent    float64
09VitesseVent    float64
10VitesseVent    float64
11VitesseVent    float64
12VitesseVent    float64
Length: 72, dtype: object

In [34]:
meteoM.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,03VitesseVent,04VitesseVent,05VitesseVent,06VitesseVent,07VitesseVent,08VitesseVent,09VitesseVent,10VitesseVent,11VitesseVent,12VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,86.820309,83.386775,80.342539,75.544287,77.929048,78.19847,77.074354,77.662511,80.211285,84.563378,...,4.766778,4.259925,4.192671,3.85158,3.928874,3.710001,3.701641,4.141725,4.28057,4.627865
Ajaccio,41.918,8.792667,5,SE,77.460203,75.654736,74.375018,74.375616,73.385373,70.521941,67.584996,67.696418,70.5,74.7479,...,3.236731,3.300194,3.293887,3.437895,3.602082,3.513099,3.389571,3.087275,3.195869,3.201756
Alencon,48.4455,0.110167,143,NO,88.790933,84.026838,79.322511,74.508081,75.291911,73.488845,70.721321,72.583467,76.561302,84.752133,...,3.709711,3.399098,3.417587,3.215792,3.271606,2.97094,2.887146,3.103692,3.344426,3.615989
Bale,47.614333,7.51,263,NE,84.569286,78.805793,72.944742,69.282028,71.963199,70.75843,69.7011,73.679607,78.735722,83.722832,...,2.897102,2.703953,2.666662,2.465817,2.423812,2.166816,2.178307,2.337874,2.55246,2.861031
Bastia,42.540667,9.485167,10,SE,73.379765,71.032745,71.78486,73.320911,72.418883,70.409662,66.36551,66.179326,68.685714,76.246392,...,3.166822,2.898373,2.658234,2.618459,2.7423,2.714049,2.821584,2.786304,3.034028,3.183148


# La météo 	hebdomadaire

In [35]:
meteoH = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'VitesseVent', 'Temperature', 'Humidite', 
                'Visibilite','Pression','Precipitation', 
                'Semaine']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='Semaine')
meteoH.columns = [f'{x[1]:02d}{x[0]}' for x in meteoH.columns]
meteoH.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,44VitesseVent,45VitesseVent,46VitesseVent,47VitesseVent,48VitesseVent,49VitesseVent,50VitesseVent,51VitesseVent,52VitesseVent,53VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,86.905039,86.897698,86.642401,86.892169,85.540385,83.389103,84.261568,82.876677,81.756705,82.874679,...,4.253393,4.564601,4.012084,4.465771,4.309527,4.590546,4.53024,4.5284,4.842071,4.694444
Ajaccio,41.918,8.792667,5,SE,78.014304,78.533675,78.137931,75.203846,77.491352,74.198976,76.203716,76.042784,75.120051,74.042199,...,3.227928,3.251412,3.07635,3.255734,3.15771,3.309182,3.196618,2.96996,3.273092,3.594097
Alencon,48.4455,0.110167,143,NO,89.453196,89.396552,89.097701,87.467391,86.753047,84.565385,84.438585,83.333119,81.811697,81.900319,...,3.393145,3.590756,3.193135,3.22114,3.369381,3.674104,3.460027,3.406383,3.95767,3.680634
Bale,47.614333,7.51,263,NE,85.635074,85.608557,83.734355,84.022379,82.19361,79.889955,80.05364,76.996154,75.183791,73.624761,...,2.496218,2.673529,2.395391,2.649841,2.567683,2.840797,2.999071,2.789987,2.935785,2.873868
Bastia,42.540667,9.485167,10,SE,73.54822,73.615583,72.969231,73.888746,72.830006,69.425546,69.637588,73.292761,70.884467,69.98315,...,2.883408,2.78,2.926129,3.537011,2.987516,3.277297,3.197931,3.072081,3.208744,3.042361


In [36]:
meteoH.isna().sum()[meteoH.isna().sum() > 0]

Series([], dtype: int64)

In [37]:
meteoH.to_parquet('../donnees/meteoH.gzip',compression='gzip', engine='pyarrow')

In [38]:
%%time
meteoH = pd.read_parquet('../donnees/meteoH.gzip', engine='pyarrow')
meteoH.dtypes

CPU times: user 18 ms, sys: 3.49 ms, total: 21.5 ms
Wall time: 12.4 ms


01Humidite       float64
02Humidite       float64
03Humidite       float64
04Humidite       float64
05Humidite       float64
                  ...   
49VitesseVent    float64
50VitesseVent    float64
51VitesseVent    float64
52VitesseVent    float64
53VitesseVent    float64
Length: 318, dtype: object

In [39]:
meteoH.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,44VitesseVent,45VitesseVent,46VitesseVent,47VitesseVent,48VitesseVent,49VitesseVent,50VitesseVent,51VitesseVent,52VitesseVent,53VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,86.905039,86.897698,86.642401,86.892169,85.540385,83.389103,84.261568,82.876677,81.756705,82.874679,...,4.253393,4.564601,4.012084,4.465771,4.309527,4.590546,4.53024,4.5284,4.842071,4.694444
Ajaccio,41.918,8.792667,5,SE,78.014304,78.533675,78.137931,75.203846,77.491352,74.198976,76.203716,76.042784,75.120051,74.042199,...,3.227928,3.251412,3.07635,3.255734,3.15771,3.309182,3.196618,2.96996,3.273092,3.594097
Alencon,48.4455,0.110167,143,NO,89.453196,89.396552,89.097701,87.467391,86.753047,84.565385,84.438585,83.333119,81.811697,81.900319,...,3.393145,3.590756,3.193135,3.22114,3.369381,3.674104,3.460027,3.406383,3.95767,3.680634
Bale,47.614333,7.51,263,NE,85.635074,85.608557,83.734355,84.022379,82.19361,79.889955,80.05364,76.996154,75.183791,73.624761,...,2.496218,2.673529,2.395391,2.649841,2.567683,2.840797,2.999071,2.789987,2.935785,2.873868
Bastia,42.540667,9.485167,10,SE,73.54822,73.615583,72.969231,73.888746,72.830006,69.425546,69.637588,73.292761,70.884467,69.98315,...,2.883408,2.78,2.926129,3.537011,2.987516,3.277297,3.197931,3.072081,3.208744,3.042361


# La météo 	journalière

In [40]:
meteoJ = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'VitesseVent', 'Temperature', 'Humidite', 
                'Visibilite','Pression','Precipitation', 
                'MoisJour']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='MoisJour')
meteoJ.columns = [f'{x[1]:04d}{x[0]}' for x in meteoJ.columns]
meteoJ.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0101Humidite,0102Humidite,0103Humidite,0104Humidite,0105Humidite,0106Humidite,0107Humidite,0108Humidite,0109Humidite,0110Humidite,...,1222VitesseVent,1223VitesseVent,1224VitesseVent,1225VitesseVent,1226VitesseVent,1227VitesseVent,1228VitesseVent,1229VitesseVent,1230VitesseVent,1231VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,88.888393,87.147321,87.040724,85.946429,86.986607,86.589286,85.861607,86.84375,86.459821,88.120536,...,5.232407,5.119444,5.384259,4.9875,4.882407,4.933178,4.550463,4.356481,4.648148,4.187907
Ajaccio,41.918,8.792667,5,SE,77.473214,78.704036,78.472973,79.636364,78.834821,77.833333,77.981651,79.395455,77.964126,77.838565,...,2.907212,3.016588,2.898148,3.379167,3.146729,3.426389,4.361395,3.154167,2.85814,3.199074
Alencon,48.4455,0.110167,143,NO,91.162162,88.331839,89.138393,89.1875,89.160714,90.617117,89.669643,89.584821,88.522321,89.526786,...,3.989352,3.950235,4.15514,4.248837,3.849537,4.061972,3.685648,3.302315,3.485116,3.516744
Bale,47.614333,7.51,263,NE,87.825893,85.986607,83.861607,84.660714,84.227679,86.65625,86.659193,86.486607,85.565022,87.151786,...,3.197222,2.50186,2.897674,3.091667,3.117209,3.185047,3.499074,2.945116,2.651389,2.319535
Bastia,42.540667,9.485167,10,SE,74.080357,72.793722,69.552036,71.607143,71.566964,75.743243,77.309091,77.678571,74.236364,74.193694,...,3.29213,3.02,2.726168,3.088626,3.713679,3.415349,3.578241,2.970833,3.042593,3.071163


In [41]:
meteoJ.to_parquet('../donnees/meteoJ.gzip',compression='gzip', engine='pyarrow')

In [42]:
%%time
meteoJ = pd.read_parquet('../donnees/meteoJ.gzip', engine='pyarrow')
meteoJ.dtypes

CPU times: user 110 ms, sys: 25.5 ms, total: 136 ms
Wall time: 62 ms


0101Humidite       float64
0102Humidite       float64
0103Humidite       float64
0104Humidite       float64
0105Humidite       float64
                    ...   
1227VitesseVent    float64
1228VitesseVent    float64
1229VitesseVent    float64
1230VitesseVent    float64
1231VitesseVent    float64
Length: 2196, dtype: object

In [43]:
meteoJ.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0101Humidite,0102Humidite,0103Humidite,0104Humidite,0105Humidite,0106Humidite,0107Humidite,0108Humidite,0109Humidite,0110Humidite,...,1222VitesseVent,1223VitesseVent,1224VitesseVent,1225VitesseVent,1226VitesseVent,1227VitesseVent,1228VitesseVent,1229VitesseVent,1230VitesseVent,1231VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,88.888393,87.147321,87.040724,85.946429,86.986607,86.589286,85.861607,86.84375,86.459821,88.120536,...,5.232407,5.119444,5.384259,4.9875,4.882407,4.933178,4.550463,4.356481,4.648148,4.187907
Ajaccio,41.918,8.792667,5,SE,77.473214,78.704036,78.472973,79.636364,78.834821,77.833333,77.981651,79.395455,77.964126,77.838565,...,2.907212,3.016588,2.898148,3.379167,3.146729,3.426389,4.361395,3.154167,2.85814,3.199074
Alencon,48.4455,0.110167,143,NO,91.162162,88.331839,89.138393,89.1875,89.160714,90.617117,89.669643,89.584821,88.522321,89.526786,...,3.989352,3.950235,4.15514,4.248837,3.849537,4.061972,3.685648,3.302315,3.485116,3.516744
Bale,47.614333,7.51,263,NE,87.825893,85.986607,83.861607,84.660714,84.227679,86.65625,86.659193,86.486607,85.565022,87.151786,...,3.197222,2.50186,2.897674,3.091667,3.117209,3.185047,3.499074,2.945116,2.651389,2.319535
Bastia,42.540667,9.485167,10,SE,74.080357,72.793722,69.552036,71.607143,71.566964,75.743243,77.309091,77.678571,74.236364,74.193694,...,3.29213,3.02,2.726168,3.088626,3.713679,3.415349,3.578241,2.970833,3.042593,3.071163


In [44]:
meteoJ.isna().sum()[meteoJ.isna().sum() > 0]

Series([], dtype: int64)