<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Les-imports-et-configuration-du-document" data-toc-modified-id="Les-imports-et-configuration-du-document-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Les imports et configuration du document</a></span><ul class="toc-item"><li><span><a href="#La-lecture-des-fichiers-d'un-répértoire" data-toc-modified-id="La-lecture-des-fichiers-d'un-répértoire-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>La lecture des fichiers d'un répértoire</a></span></li></ul></li><li><span><a href="#La-temperature-mensuelle" data-toc-modified-id="La-temperature-mensuelle-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>La temperature mensuelle</a></span></li><li><span><a href="#La-météo-mensuelle" data-toc-modified-id="La-météo-mensuelle-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>La météo mensuelle</a></span></li><li><span><a href="#La-météo-----hebdomadaire" data-toc-modified-id="La-météo-----hebdomadaire-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>La météo     hebdomadaire</a></span></li><li><span><a href="#La-météo-----journalière" data-toc-modified-id="La-météo-----journalière-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>La météo     journalière</a></span></li></ul></div>

# Les imports et configuration du document

In [1]:
import pandas as pd 
import seaborn as sns
import warnings 
import os
import time
from datetime import datetime
from matplotlib import pyplot as plt

warnings.filterwarnings(action="ignore")

%matplotlib inline
if int(str(sns.__version__).split('.')[1]) > 8 : 
    plt.style.use('seaborn-v0_8-darkgrid')
else:
    plt.style.use('seaborn-darkgrid')
    
sns.set(font_scale=3)

# os.chdir("donnees")

In [2]:

def tempsExecution(func):
    def chronometrerExécution(*_args, **_kwargs): 
        t0 = time.perf_counter()
        result = func(*_args, **_kwargs) 
        elapsed = time.perf_counter() - t0
        name = func.__name__
        args = ', '.join(repr(arg) for arg in _args) \
               if len(_args) == 1 \
               else ', '.join(arg+'='+repr(_kwargs[arg]) for arg in _kwargs)
        print(f'exécution {elapsed:0.8f}s\nfonction {name}({args})')
        return result
    return chronometrerExécution

In [3]:
@tempsExecution
def lectureFichiersMeteo(repertoire='../donnees/meteo_test'):
    meteo = pd.DataFrame()

    for dirname, _, filenames in os.walk(repertoire):
        for filename in filenames:
            # if filename.rfind('synop') >= 0 :
            meteo = pd.concat([meteo,
                               pd.read_csv(os.path.join(dirname, filename),
                                           sep=';',
                                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                                           na_values='mq',
                                           dtype={'numer_sta':str,'date':str},
                                          ).rename( columns={'numer_sta':'Station',
                                                              'date':'DateHeure',
                                                              'dd':'DirectionVent',
                                                              'ff':'VitesseVent',
                                                              't':'Temperature',
                                                              'u':'Humidite',
                                                              'vv':'Visibilite',
                                                              'pres':'Pression',
                                                              'rr1' :'Precipitation01',
                                                              'rr3' :'Precipitation03',
                                                              'rr6' :'Precipitation06',
                                                              'rr12':'Precipitation12',
                                                              'rr24':'Precipitation24'})],axis=0)
    meteo.reset_index().drop(columns='index',inplace=True)
    meteo["DateHeure"] = pd.to_datetime(meteo["DateHeure"], format='%Y%m%d%H%M%S')
    meteo.Temperature  = meteo.Temperature - 273.15
    meteo.Pression     = meteo.Pression / 100
    meteo.Visibilite   = meteo.Visibilite / 1000
    meteo['Mois']      = meteo.DateHeure.dt.month
    meteo['Annee']     = meteo.DateHeure.dt.year
    meteo['AnneeMois'] = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.month
    meteo['Semaine']   = meteo.DateHeure.dt.isocalendar().week
    meteo['MoisJour']  = meteo.DateHeure.dt.month*100 + meteo.DateHeure.dt.day
    meteo['JourA']     = meteo.DateHeure.dt.dayofyear
    return meteo

In [4]:
_ = lectureFichiersMeteo(repertoire='../donnees/meteo')

exécution 51.33682090s
fonction lectureFichiersMeteo(repertoire='../donnees/meteo')


In [5]:
@tempsExecution
def lectureFichiersMeteo(repertoire='../donnees/meteo_test'):
    listeFichiers = []
    for dirname, dirnames, filenames in os.walk(repertoire):
        for filename in filenames:
            listeFichiers.append(os.path.join(dirname, filename))
        
    meteo = pd.concat([ pd.read_csv(fichier,
                           sep=';',
                           usecols=['numer_sta','date','dd','ff','t','u','vv',
                                    'pres','rr1','rr3','rr6','rr12','rr24'],
                           na_values='mq',
                           dtype={'numer_sta':str,'date':str},
                          ).rename( columns={'numer_sta':'Station',
                                              'date':'DateHeure',
                                              'dd':'DirectionVent',
                                              'ff':'VitesseVent',
                                              't':'Temperature',
                                              'u':'Humidite',
                                              'vv':'Visibilite',
                                              'pres':'Pression',
                                              'rr1' :'Precipitation01',
                                              'rr3' :'Precipitation03',
                                              'rr6' :'Precipitation06',
                                              'rr12':'Precipitation12',
                                              'rr24':'Precipitation24'}) 
                         for fichier in listeFichiers])
    meteo.reset_index().drop(columns='index',inplace=True)
    meteo["DateHeure"] = pd.to_datetime(meteo["DateHeure"], format='%Y%m%d%H%M%S')
    meteo.Temperature  = meteo.Temperature - 273.15
    meteo.Pression     = meteo.Pression / 100
    meteo.Visibilite   = meteo.Visibilite / 1000
    meteo['Mois']      = meteo.DateHeure.dt.month
    meteo['Annee']     = meteo.DateHeure.dt.year
    meteo['AnneeMois'] = meteo.DateHeure.dt.year*100 + meteo.DateHeure.dt.month
    meteo['Semaine']   = meteo.DateHeure.dt.isocalendar().week
    meteo['MoisJour']  = meteo.DateHeure.dt.month*100 + meteo.DateHeure.dt.day
    meteo['JourA']     = meteo.DateHeure.dt.dayofyear
    return meteo
    

In [6]:
_ = lectureFichiersMeteo(repertoire='../donnees/meteo')

exécution 21.74414310s
fonction lectureFichiersMeteo(repertoire='../donnees/meteo')


## La lecture des fichiers d'un répértoire 

In [4]:
%%time
meteo = lectureFichiersMeteo()

CPU times: user 22.4 s, sys: 8.34 s, total: 30.7 s
Wall time: 30 s


In [5]:
meteo.tail()

Unnamed: 0,Station,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Precipitation01,Precipitation03,Precipitation06,Precipitation12,Precipitation24,Mois,Annee,AnneeMois,Semaine,MoisJour,JourA
13957,78925,2011-12-31 21:00:00,80.0,6.7,25.7,82.0,25.0,,,,,,,12,2011,201112,52,1231,365
13958,81401,2011-12-31 21:00:00,40.0,2.6,25.1,89.0,,1010.9,,0.0,,,,12,2011,201112,52,1231,365
13959,81405,2011-12-31 21:00:00,140.0,2.6,23.7,93.0,3.0,1011.0,,11.0,,,,12,2011,201112,52,1231,365
13960,81408,2011-12-31 21:00:00,30.0,3.1,24.0,88.0,,1011.1,,0.2,,,,12,2011,201112,52,1231,365
13961,89642,2011-12-31 21:00:00,200.0,3.1,1.4,49.0,50.0,981.6,,,,,,12,2011,201112,52,1231,365


In [6]:
meteo.shape

(4687732, 19)

In [7]:
postes = pd.read_csv('postesSynop.csv',sep=';',dtype={'ID':str})
postes.Nom =postes.Nom.apply(lambda x : x if x in ['CLERMONT-FD','MONT-DE-MARSAN',
                                       'ST-PIERRE','ST-BARTHELEMY METEO'] 
                               else x[0:x.find('-')] 
                                    if x.find('-') != -1 else x).apply(lambda x : str(x).title())
postes.Altitude = postes.Altitude.astype('int16')
postes = postes[postes.ID < '08000']

In [8]:
postes.loc[postes.Latitude  < postes.Latitude.mean(),'Zone'] = 'S'
postes.loc[postes.Latitude  > postes.Latitude.mean(),'Zone'] = 'N'
postes.loc[postes.Longitude < postes.Longitude.mean(),'Zone'] += 'O'
postes.loc[postes.Longitude > postes.Longitude.mean(),'Zone'] += 'E'

In [9]:
postes.head()

Unnamed: 0,ID,Nom,Latitude,Longitude,Altitude,Zone
0,7005,Abbeville,50.136,1.834,69,NO
1,7015,Lille,50.57,3.0975,47,NE
2,7020,Pte De La Hague,49.725167,-1.939833,6,NO
3,7027,Caen,49.18,-0.456167,67,NO
4,7037,Rouen,49.383,1.181667,151,NO


In [10]:
meteo = postes.merge(meteo, how = "inner", 
                     left_on = "ID", right_on = "Station").drop(["ID","Station"], axis = "columns")

In [11]:
meteo['Precipitation'] =  meteo['Precipitation03'].combine_first(meteo['Precipitation06']/2)\
                                                  .combine_first(meteo['Precipitation12']/4)\
                                                  .combine_first(meteo['Precipitation24']/8)\
                                                  .combine_first(meteo['Precipitation01']*3)

In [12]:
meteo.drop(columns=['Precipitation06',
                    'Precipitation12',
                    'Precipitation24',
                    'Precipitation01',
                    'Precipitation03'], inplace=True)

In [13]:
meteo.to_parquet('meteo.gzip',compression='gzip', engine='pyarrow')

In [14]:
!dir meteo.gzip

meteo.gzip


In [15]:
meteo.to_csv('meteo.csv')

In [16]:
%%time
meteo = pd.read_parquet('meteo.gzip', engine='pyarrow')
meteo.dtypes

CPU times: user 887 ms, sys: 357 ms, total: 1.24 s
Wall time: 309 ms


Nom                      object
Latitude                float64
Longitude               float64
Altitude                  int16
Zone                     object
DateHeure        datetime64[ns]
DirectionVent           float64
VitesseVent             float64
Temperature             float64
Humidite                float64
Visibilite              float64
Pression                float64
Mois                      int32
Annee                     int32
AnneeMois                 int64
Semaine                  UInt32
MoisJour                  int64
JourA                     int32
Precipitation           float64
dtype: object

In [17]:
%%time
pd.read_csv('meteo.csv').dtypes

CPU times: user 4.38 s, sys: 637 ms, total: 5.01 s
Wall time: 4.98 s


Unnamed: 0         int64
Nom               object
Latitude         float64
Longitude        float64
Altitude           int64
Zone              object
DateHeure         object
DirectionVent    float64
VitesseVent      float64
Temperature      float64
Humidite         float64
Visibilite       float64
Pression         float64
Mois               int64
Annee              int64
AnneeMois          int64
Semaine            int64
MoisJour           int64
JourA              int64
Precipitation    float64
dtype: object

In [18]:
meteo.isna().sum()

Nom                   0
Latitude              0
Longitude             0
Altitude              0
Zone                  0
DateHeure             0
DirectionVent      8125
VitesseVent        7686
Temperature       14232
Humidite          17905
Visibilite       443500
Pression          20743
Mois                  0
Annee                 0
AnneeMois             0
Semaine               0
MoisJour              0
JourA                 0
Precipitation     29110
dtype: int64

In [19]:
meteo.head()

Unnamed: 0,Nom,Latitude,Longitude,Altitude,Zone,DateHeure,DirectionVent,VitesseVent,Temperature,Humidite,Visibilite,Pression,Mois,Annee,AnneeMois,Semaine,MoisJour,JourA,Precipitation
0,Abbeville,50.136,1.834,69,NO,2018-08-01 00:00:00,320.0,0.6,15.1,84.0,20.0,1012.5,8,2018,201808,31,801,213,0.0
1,Abbeville,50.136,1.834,69,NO,2018-08-01 03:00:00,0.0,0.0,13.9,88.0,20.0,1012.8,8,2018,201808,31,801,213,0.0
2,Abbeville,50.136,1.834,69,NO,2018-08-01 06:00:00,0.0,0.0,15.7,89.0,20.0,1013.2,8,2018,201808,31,801,213,0.0
3,Abbeville,50.136,1.834,69,NO,2018-08-01 09:00:00,310.0,3.6,21.7,54.0,20.0,1013.8,8,2018,201808,31,801,213,0.0
4,Abbeville,50.136,1.834,69,NO,2018-08-01 12:00:00,280.0,6.3,24.1,53.0,20.0,1013.4,8,2018,201808,31,801,213,0.0


In [20]:
meteo.columns

Index(['Nom', 'Latitude', 'Longitude', 'Altitude', 'Zone', 'DateHeure',
       'DirectionVent', 'VitesseVent', 'Temperature', 'Humidite', 'Visibilite',
       'Pression', 'Mois', 'Annee', 'AnneeMois', 'Semaine', 'MoisJour',
       'JourA', 'Precipitation'],
      dtype='object')

# La temperature mensuelle

In [21]:
temperatures = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'Temperature','Mois']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='Mois')
temperatures.columns = ['janv.','févr.','mars','avr.','mai','juin','juill.','août','sept.','oct.','nov.','déc.']
temperatures.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,janv.,févr.,mars,avr.,mai,juin,juill.,août,sept.,oct.,nov.,déc.
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Abbeville,50.136,1.834,69,NO,4.416015,5.088346,7.184815,9.752299,12.884036,15.93366,17.664069,17.818991,15.381905,12.019232,7.801386,5.118958
Ajaccio,41.918,8.792667,5,SE,8.971346,9.086835,10.95172,13.586948,17.399222,21.596028,23.869793,23.890734,20.840689,17.513784,13.140287,10.003385
Alencon,48.4455,0.110167,143,NO,4.502626,5.042609,7.396901,9.982821,13.431216,16.939596,18.613705,18.444194,15.455331,11.899666,7.60989,4.985297
Bale,47.614333,7.51,263,NE,2.102671,3.250384,6.62827,10.519056,14.8776,18.859165,20.112717,19.659928,15.428438,11.174471,5.962064,2.901689
Bastia,42.540667,9.485167,10,SE,9.132503,9.277054,11.186677,13.764725,17.795831,22.107909,24.846507,24.814675,21.202917,17.452476,13.203654,10.125015


In [22]:
temperatures.to_parquet('temperatures.gzip',compression='gzip', engine='pyarrow')

In [23]:
temperatures.to_csv('temperatures.csv')

In [24]:
%%time
temperatures = pd.read_parquet('temperatures.gzip', engine='pyarrow')
temperatures.dtypes

CPU times: user 4.9 ms, sys: 173 µs, total: 5.08 ms
Wall time: 3.96 ms


janv.     float64
févr.     float64
mars      float64
avr.      float64
mai       float64
juin      float64
juill.    float64
août      float64
sept.     float64
oct.      float64
nov.      float64
déc.      float64
dtype: object

In [25]:
temperatures.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,janv.,févr.,mars,avr.,mai,juin,juill.,août,sept.,oct.,nov.,déc.
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Abbeville,50.136,1.834,69,NO,4.416015,5.088346,7.184815,9.752299,12.884036,15.93366,17.664069,17.818991,15.381905,12.019232,7.801386,5.118958
Ajaccio,41.918,8.792667,5,SE,8.971346,9.086835,10.95172,13.586948,17.399222,21.596028,23.869793,23.890734,20.840689,17.513784,13.140287,10.003385
Alencon,48.4455,0.110167,143,NO,4.502626,5.042609,7.396901,9.982821,13.431216,16.939596,18.613705,18.444194,15.455331,11.899666,7.60989,4.985297
Bale,47.614333,7.51,263,NE,2.102671,3.250384,6.62827,10.519056,14.8776,18.859165,20.112717,19.659928,15.428438,11.174471,5.962064,2.901689
Bastia,42.540667,9.485167,10,SE,9.132503,9.277054,11.186677,13.764725,17.795831,22.107909,24.846507,24.814675,21.202917,17.452476,13.203654,10.125015


# La météo mensuelle

In [26]:
meteoM = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'VitesseVent', 'Temperature', 'Humidite', 
                'Visibilite','Pression','Precipitation', 
                'Mois']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='Mois')
meteoM.columns = [f'{x[1]:02d}{x[0]}' for x in meteoM.columns]
meteoM.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,03VitesseVent,04VitesseVent,05VitesseVent,06VitesseVent,07VitesseVent,08VitesseVent,09VitesseVent,10VitesseVent,11VitesseVent,12VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,86.975155,83.325977,80.486119,75.751961,78.202026,78.624456,77.420686,78.120302,80.311983,84.552384,...,4.736764,4.276614,4.196525,3.849655,3.901988,3.712431,3.681765,4.127008,4.302552,4.648167
Ajaccio,41.918,8.792667,5,SE,77.529378,75.806692,74.334296,74.341649,73.363951,70.478437,67.49364,67.645003,70.284067,74.452795,...,3.232746,3.303168,3.290537,3.425614,3.601272,3.506932,3.382304,3.089899,3.179686,3.203963
Alencon,48.4455,0.110167,143,NO,88.834681,83.972492,79.3883,74.503517,75.666423,73.701914,71.170101,72.901735,76.83521,84.782143,...,3.684932,3.42503,3.44228,3.176978,3.267627,2.977263,2.872489,3.068773,3.303538,3.592988
Bale,47.614333,7.51,263,NE,84.607354,78.997464,72.996386,69.473009,72.066166,70.769323,70.453125,74.063934,78.932384,83.696965,...,2.877001,2.72045,2.691697,2.456689,2.412413,2.1565,2.161537,2.323689,2.498577,2.86092
Bastia,42.540667,9.485167,10,SE,73.245688,71.438186,71.561406,73.276361,72.507856,70.439276,66.36427,66.231745,68.743308,75.969851,...,3.161443,2.914787,2.652006,2.611589,2.75305,2.703995,2.816942,2.792344,3.007096,3.181684


In [27]:
meteoM.to_parquet('meteoM.gzip',compression='gzip', engine='pyarrow')

In [28]:
%%time
meteoM = pd.read_parquet('meteoM.gzip', engine='pyarrow')
meteoM.dtypes

CPU times: user 9.31 ms, sys: 253 µs, total: 9.56 ms
Wall time: 7.33 ms


01Humidite       float64
02Humidite       float64
03Humidite       float64
04Humidite       float64
05Humidite       float64
                  ...   
08VitesseVent    float64
09VitesseVent    float64
10VitesseVent    float64
11VitesseVent    float64
12VitesseVent    float64
Length: 72, dtype: object

In [29]:
meteoM.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,03VitesseVent,04VitesseVent,05VitesseVent,06VitesseVent,07VitesseVent,08VitesseVent,09VitesseVent,10VitesseVent,11VitesseVent,12VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,86.975155,83.325977,80.486119,75.751961,78.202026,78.624456,77.420686,78.120302,80.311983,84.552384,...,4.736764,4.276614,4.196525,3.849655,3.901988,3.712431,3.681765,4.127008,4.302552,4.648167
Ajaccio,41.918,8.792667,5,SE,77.529378,75.806692,74.334296,74.341649,73.363951,70.478437,67.49364,67.645003,70.284067,74.452795,...,3.232746,3.303168,3.290537,3.425614,3.601272,3.506932,3.382304,3.089899,3.179686,3.203963
Alencon,48.4455,0.110167,143,NO,88.834681,83.972492,79.3883,74.503517,75.666423,73.701914,71.170101,72.901735,76.83521,84.782143,...,3.684932,3.42503,3.44228,3.176978,3.267627,2.977263,2.872489,3.068773,3.303538,3.592988
Bale,47.614333,7.51,263,NE,84.607354,78.997464,72.996386,69.473009,72.066166,70.769323,70.453125,74.063934,78.932384,83.696965,...,2.877001,2.72045,2.691697,2.456689,2.412413,2.1565,2.161537,2.323689,2.498577,2.86092
Bastia,42.540667,9.485167,10,SE,73.245688,71.438186,71.561406,73.276361,72.507856,70.439276,66.36427,66.231745,68.743308,75.969851,...,3.161443,2.914787,2.652006,2.611589,2.75305,2.703995,2.816942,2.792344,3.007096,3.181684


# La météo 	hebdomadaire

In [30]:
meteoH = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'VitesseVent', 'Temperature', 'Humidite', 
                'Visibilite','Pression','Precipitation', 
                'Semaine']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='Semaine')
meteoH.columns = [f'{x[1]:02d}{x[0]}' for x in meteoH.columns]
meteoH.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,44VitesseVent,45VitesseVent,46VitesseVent,47VitesseVent,48VitesseVent,49VitesseVent,50VitesseVent,51VitesseVent,52VitesseVent,53VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,87.046036,87.223785,86.772031,86.837612,85.790385,82.854487,84.239075,82.814696,81.699872,82.627249,...,4.240999,4.607886,3.95935,4.49668,4.441391,4.605792,4.509546,4.5248,4.888742,4.716071
Ajaccio,41.918,8.792667,5,SE,78.14157,78.581142,78.161558,75.316026,77.378603,74.603967,76.081999,76.220945,75.159642,74.222506,...,3.151057,3.24474,3.088793,3.271011,3.139561,3.347771,3.214589,2.965087,3.257048,3.530714
Alencon,48.4455,0.110167,143,NO,89.467732,89.353129,89.228608,87.52046,86.807569,84.208333,84.493891,83.197689,81.468509,81.68754,...,3.245891,3.507579,3.071213,3.290691,3.46225,3.681673,3.458961,3.343285,3.897405,3.497101
Bale,47.614333,7.51,263,NE,85.532227,85.766922,83.641124,84.032609,82.401278,80.06142,80.107918,77.05641,75.317167,73.388003,...,2.442566,2.607361,2.312948,2.605886,2.608554,2.885316,2.987724,2.770756,2.924552,2.729032
Bastia,42.540667,9.485167,10,SE,73.626521,73.588538,73.014744,73.084244,73.209272,69.658537,70.112893,73.539398,71.365385,70.272197,...,2.890231,2.781848,2.924431,3.427563,2.981897,3.321172,3.179105,3.077852,3.175681,3.001071


In [31]:
meteoH.isna().sum()[meteoH.isna().sum() > 0]

Series([], dtype: int64)

In [32]:
meteoH.to_parquet('meteoH.gzip',compression='gzip', engine='pyarrow')

In [33]:
%%time
meteoH = pd.read_parquet('meteoH.gzip', engine='pyarrow')
meteoH.dtypes

CPU times: user 28 ms, sys: 4.77 ms, total: 32.8 ms
Wall time: 22.9 ms


01Humidite       float64
02Humidite       float64
03Humidite       float64
04Humidite       float64
05Humidite       float64
                  ...   
49VitesseVent    float64
50VitesseVent    float64
51VitesseVent    float64
52VitesseVent    float64
53VitesseVent    float64
Length: 318, dtype: object

In [34]:
meteoH.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,01Humidite,02Humidite,03Humidite,04Humidite,05Humidite,06Humidite,07Humidite,08Humidite,09Humidite,10Humidite,...,44VitesseVent,45VitesseVent,46VitesseVent,47VitesseVent,48VitesseVent,49VitesseVent,50VitesseVent,51VitesseVent,52VitesseVent,53VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,87.046036,87.223785,86.772031,86.837612,85.790385,82.854487,84.239075,82.814696,81.699872,82.627249,...,4.240999,4.607886,3.95935,4.49668,4.441391,4.605792,4.509546,4.5248,4.888742,4.716071
Ajaccio,41.918,8.792667,5,SE,78.14157,78.581142,78.161558,75.316026,77.378603,74.603967,76.081999,76.220945,75.159642,74.222506,...,3.151057,3.24474,3.088793,3.271011,3.139561,3.347771,3.214589,2.965087,3.257048,3.530714
Alencon,48.4455,0.110167,143,NO,89.467732,89.353129,89.228608,87.52046,86.807569,84.208333,84.493891,83.197689,81.468509,81.68754,...,3.245891,3.507579,3.071213,3.290691,3.46225,3.681673,3.458961,3.343285,3.897405,3.497101
Bale,47.614333,7.51,263,NE,85.532227,85.766922,83.641124,84.032609,82.401278,80.06142,80.107918,77.05641,75.317167,73.388003,...,2.442566,2.607361,2.312948,2.605886,2.608554,2.885316,2.987724,2.770756,2.924552,2.729032
Bastia,42.540667,9.485167,10,SE,73.626521,73.588538,73.014744,73.084244,73.209272,69.658537,70.112893,73.539398,71.365385,70.272197,...,2.890231,2.781848,2.924431,3.427563,2.981897,3.321172,3.179105,3.077852,3.175681,3.001071


# La météo 	journalière

In [35]:
meteoJ = meteo[['Nom', 'Latitude', 'Longitude', 'Altitude','Zone', 
                'VitesseVent', 'Temperature', 'Humidite', 
                'Visibilite','Pression','Precipitation', 
                'MoisJour']].pivot_table(index=['Nom', 'Latitude', 'Longitude', 'Altitude','Zone'],
                           columns='MoisJour')
meteoJ.columns = [f'{x[1]:04d}{x[0]}' for x in meteoJ.columns]
meteoJ.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0101Humidite,0102Humidite,0103Humidite,0104Humidite,0105Humidite,0106Humidite,0107Humidite,0108Humidite,0109Humidite,0110Humidite,...,1222VitesseVent,1223VitesseVent,1224VitesseVent,1225VitesseVent,1226VitesseVent,1227VitesseVent,1228VitesseVent,1229VitesseVent,1230VitesseVent,1231VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,88.589286,87.223214,87.312217,86.232143,87.040179,87.075893,86.008929,86.691964,86.901786,88.566964,...,5.246759,5.146759,5.230556,4.940278,4.881944,4.893925,4.772222,4.55463,4.74537,4.202326
Ajaccio,41.918,8.792667,5,SE,77.620536,78.524664,78.635135,79.709091,79.022321,78.108108,77.83945,79.318182,78.219731,78.336323,...,2.926442,3.023697,2.908333,3.221759,3.144393,3.422685,4.146047,3.128704,2.86093,3.177778
Alencon,48.4455,0.110167,143,NO,90.846847,88.2287,89.205357,89.34375,89.441964,90.554054,89.504464,89.526786,88.446429,89.8125,...,3.862037,3.7277,3.974766,4.116279,3.803241,3.982629,3.733333,3.386111,3.50093,3.477674
Bale,47.614333,7.51,263,NE,87.696429,86.008929,84.049107,84.794643,83.897321,86.678571,86.421525,86.299107,86.040359,87.102679,...,3.171296,2.528837,2.732093,3.041204,3.124651,3.11729,3.471296,2.913953,2.674074,2.316279
Bastia,42.540667,9.485167,10,SE,74.526786,73.278027,69.375566,71.308036,71.455357,75.004505,77.0,78.147321,75.354545,74.585586,...,3.314815,3.064186,2.711215,2.970616,3.518868,3.355349,3.583333,2.954167,2.977778,3.037674


In [36]:
meteoJ.to_parquet('meteoJ.gzip',compression='gzip', engine='pyarrow')

In [37]:
%%time
meteoJ = pd.read_parquet('meteoJ.gzip', engine='pyarrow')
meteoJ.dtypes

CPU times: user 187 ms, sys: 36.5 ms, total: 223 ms
Wall time: 157 ms


0101Humidite       float64
0102Humidite       float64
0103Humidite       float64
0104Humidite       float64
0105Humidite       float64
                    ...   
1227VitesseVent    float64
1228VitesseVent    float64
1229VitesseVent    float64
1230VitesseVent    float64
1231VitesseVent    float64
Length: 2196, dtype: object

In [38]:
meteoJ.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0101Humidite,0102Humidite,0103Humidite,0104Humidite,0105Humidite,0106Humidite,0107Humidite,0108Humidite,0109Humidite,0110Humidite,...,1222VitesseVent,1223VitesseVent,1224VitesseVent,1225VitesseVent,1226VitesseVent,1227VitesseVent,1228VitesseVent,1229VitesseVent,1230VitesseVent,1231VitesseVent
Nom,Latitude,Longitude,Altitude,Zone,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Abbeville,50.136,1.834,69,NO,88.589286,87.223214,87.312217,86.232143,87.040179,87.075893,86.008929,86.691964,86.901786,88.566964,...,5.246759,5.146759,5.230556,4.940278,4.881944,4.893925,4.772222,4.55463,4.74537,4.202326
Ajaccio,41.918,8.792667,5,SE,77.620536,78.524664,78.635135,79.709091,79.022321,78.108108,77.83945,79.318182,78.219731,78.336323,...,2.926442,3.023697,2.908333,3.221759,3.144393,3.422685,4.146047,3.128704,2.86093,3.177778
Alencon,48.4455,0.110167,143,NO,90.846847,88.2287,89.205357,89.34375,89.441964,90.554054,89.504464,89.526786,88.446429,89.8125,...,3.862037,3.7277,3.974766,4.116279,3.803241,3.982629,3.733333,3.386111,3.50093,3.477674
Bale,47.614333,7.51,263,NE,87.696429,86.008929,84.049107,84.794643,83.897321,86.678571,86.421525,86.299107,86.040359,87.102679,...,3.171296,2.528837,2.732093,3.041204,3.124651,3.11729,3.471296,2.913953,2.674074,2.316279
Bastia,42.540667,9.485167,10,SE,74.526786,73.278027,69.375566,71.308036,71.455357,75.004505,77.0,78.147321,75.354545,74.585586,...,3.314815,3.064186,2.711215,2.970616,3.518868,3.355349,3.583333,2.954167,2.977778,3.037674


In [39]:
meteoJ.isna().sum()[meteoJ.isna().sum() > 0]

Series([], dtype: int64)