In [5]:
pip install xarray

Note: you may need to restart the kernel to use updated packages.


In [6]:
import boto3
import xarray as xr #to read netcdf
import pandas as pd
import os
import logging
import numpy as np
from math import pi
import datetime
from dateutil.relativedelta import relativedelta
from pathlib import Path

In [7]:
#Logger set-up
logging.basicConfig(format=' %(asctime)s -  %(levelname)s -  %(message)s', 
                    handlers = [logging.StreamHandler()])
logging.getLogger().setLevel(logging.INFO)

In [8]:
S3_bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
S3_CSV_FOLD = r'EtudeWindIndex/ERA5'

In [9]:
# read project information and compute associated nodes
S3_project_url = f's3://{S3_bucket_name}/ERA5/config/ERA5_project_list.csv'
projects = pd.read_csv(S3_project_url, index_col='project_code', sep=';')

 2022-01-28 15:23:04,591 -  INFO -  ascii passed initial chaos probing. Mean measured chaos is 0.000000 %
 2022-01-28 15:23:04,593 -  INFO -  ascii should target any language(s) of ['Latin Based']
 2022-01-28 15:23:04,598 -  INFO -  We detected language [('English', 1.0), ('Indonesian', 1.0), ('Simple English', 1.0)] using ascii
 2022-01-28 15:23:04,599 -  INFO -  ascii is most likely the one. Stopping the process.
 2022-01-28 15:23:04,605 -  INFO -  ascii passed initial chaos probing. Mean measured chaos is 0.000000 %
 2022-01-28 15:23:04,606 -  INFO -  ascii should target any language(s) of ['Latin Based']
 2022-01-28 15:23:04,611 -  INFO -  We detected language [('German', 0.8333), ('Hungarian', 0.8333), ('Slovak', 0.8333), ('English', 0.75), ('Dutch', 0.75), ('Italian', 0.75), ('Swedish', 0.75), ('Norwegian', 0.75), ('Czech', 0.75), ('Indonesian', 0.75), ('Danish', 0.75), ('Polish', 0.6667), ('Finnish', 0.6667), ('Slovene', 0.6667), ('Turkish', 0.5833), ('Vietnamese', 0.5), ('Lithu

In [10]:
projects.head(5)

Unnamed: 0_level_0,latitude,longitude
project_code,Unnamed: 1_level_1,Unnamed: 2_level_1
AUQB,43.6,3.622
AMEL,48.83,6.46
BOUS,49.19,6.52
BRIY,49.75,3.4
CLIT,49.66,-1.37


In [11]:
#On choisit la période
start_month = '2000-01'
end_month = '2021-11'

List_projects = ['CDBO','AMEL','ESPS']

In [12]:
#On récupère la courbe de puissance
S3_pc_url = f's3://{S3_bucket_name}/ERA5/config/power_curve_V90-3.0MW.csv'
power_curve = pd.read_csv(S3_pc_url, index_col='windspeed')

In [13]:
months_range = pd.date_range(start=start_month, end=end_month, freq='MS')

for code in projects.loc[List_projects].index :
    
    logging.info("project : {}".format(code))
    
    projects_ERA5 = pd.DataFrame(columns=['time','u100','v100','u10','v10','d2m','t2m','sf','sp','msdwswrf','tp'])
    projects_ERA5.set_index('time',inplace=True)

    for date in months_range :
        #Construction des chemins vers les données
        input_name_nc = f'ERA5_france_'+str(date)[0:7]+'.nc'
        bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
        S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
        body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
        netcdf = xr.open_dataset(body, )
        #Récupération des données
        df_ERA5 = netcdf.sel(latitude=projects.loc[code].latitude, longitude=projects.loc[code].latitude, method="nearest").to_dataframe()
        df_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)
        
        projects_ERA5 = pd.concat([projects_ERA5, df_ERA5])
    
    #On calcule les vitesses et direction de vent
    projects_ERA5['ws100'] = (projects_ERA5['u100']**2 + projects_ERA5['v100']**2)**0.5
    projects_ERA5['wd100'] = round(np.arctan2(projects_ERA5['u100'], projects_ERA5['v100'])*180/pi + 180,0)
    projects_ERA5['ws10'] = (projects_ERA5['u10']**2 + projects_ERA5['v10']**2)**0.5
    projects_ERA5['wd10'] = round(np.arctan2(projects_ERA5['u10'], projects_ERA5['v10'])*180/pi + 180,0)
    projects_ERA5['E100'] = np.interp(projects_ERA5['ws100'], power_curve.index, power_curve['power'])  # Energy (using a power curve)
    projects_ERA5['rh'] = 100 - 5 * (projects_ERA5['t2m'] - projects_ERA5['d2m'])
    projects_ERA5['density'] = projects_ERA5['sp'] /  ( 287.058 * projects_ERA5['t2m'])
    projects_ERA5['E100_cor'] = projects_ERA5['E100']*projects_ERA5['density']/1.225   
    
    #On enlève les colonnes en trop
    projects_ERA5.drop(columns=['u100', 'v100'], inplace=True)
    projects_ERA5.drop(columns=['u10', 'v10'], inplace=True)
    projects_ERA5.drop(columns=['sf', 'msdwswrf', 'tp'], inplace=True)
    
    #Sauvegarde 
    ERA5_hourly_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Data/ERA5/ERA5_hourly/Clean/ERA5_'+code+'.csv')
    projects_ERA5.to_csv(ERA5_hourly_path, index=True, sep=';')
    
    #Sauvegarde sur le S3
    outfile = 'ERA5_'+code+'.csv'
    projects_ERA5.to_csv(f's3://{S3_bucket_name}/{S3_CSV_FOLD}/{outfile}', index=True, sep=';')
    
    logging.info("Complété à : "+str((List_projects.index(code)+1)/len(List_projects)*100)+'%')

 2022-01-28 15:24:47,793 -  INFO -  project : CDBO
 2022-01-28 15:27:54,311 -  INFO -  NumExpr defaulting to 2 threads.
 2022-01-28 15:28:05,089 -  INFO -  Complété à : 33.33333333333333%
 2022-01-28 15:28:05,090 -  INFO -  project : AMEL
 2022-01-28 15:31:20,964 -  INFO -  Complété à : 66.66666666666666%
 2022-01-28 15:31:20,964 -  INFO -  project : ESPS
 2022-01-28 15:34:15,892 -  INFO -  Complété à : 100.0%
