# Imports

In [2]:
pip install xarray 

Collecting xarray
  Downloading xarray-0.16.2-py3-none-any.whl (736 kB)
     |████████████████████████████████| 736 kB 29.1 MB/s            
Installing collected packages: xarray
Successfully installed xarray-0.16.2
Note: you may need to restart the kernel to use updated packages.


In [3]:
import boto3
import xarray as xr #to read netcdf
import pandas as pd
import os
import logging
import numpy as np
from math import pi
import datetime
from dateutil.relativedelta import relativedelta
from pathlib import Path

In [4]:
#Logger set-up
logging.basicConfig(format=' %(asctime)s -  %(levelname)s -  %(message)s', 
                    handlers = [logging.StreamHandler()])
logging.getLogger().setLevel(logging.INFO)

In [5]:
S3_bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
S3_CSV_FOLD = r'EtudeWindIndex/ERA5'

# Récupération des données ERA5 

In [6]:
#On choisit l'année et le mois
year = 2001
month = 2
name = str(year)+'-'+str('0'+str(month) if month<10 else month)

In [7]:
input_name_nc = f'ERA5_france_'+name+'.nc'
bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
netcdf = xr.open_dataset(body, )

In [8]:
netcdf

In [9]:
# read project information and compute associated nodes
S3_project_url = f's3://{bucket_name}/ERA5/config/ERA5_project_list.csv'
projects = pd.read_csv(S3_project_url, index_col='project_code', sep=';')

 2022-01-27 16:26:31,322 -  INFO -  ascii passed initial chaos probing. Mean measured chaos is 0.000000 %
 2022-01-27 16:26:31,323 -  INFO -  ascii should target any language(s) of ['Latin Based']
 2022-01-27 16:26:31,328 -  INFO -  We detected language [('English', 0.9), ('Indonesian', 0.9), ('Simple English', 0.9)] using ascii
 2022-01-27 16:26:31,330 -  INFO -  ascii is most likely the one. Stopping the process.
 2022-01-27 16:26:31,337 -  INFO -  ascii passed initial chaos probing. Mean measured chaos is 0.000000 %
 2022-01-27 16:26:31,338 -  INFO -  ascii should target any language(s) of ['Latin Based']
 2022-01-27 16:26:31,341 -  INFO -  We detected language [('German', 0.8333), ('Hungarian', 0.8333), ('Slovak', 0.8333), ('English', 0.75), ('Dutch', 0.75), ('Italian', 0.75), ('Swedish', 0.75), ('Norwegian', 0.75), ('Czech', 0.75), ('Indonesian', 0.75), ('Danish', 0.75), ('Polish', 0.6667), ('Finnish', 0.6667), ('Slovene', 0.6667), ('Turkish', 0.5833), ('Vietnamese', 0.5), ('Lithu

In [11]:
projects.sample(5)

Unnamed: 0_level_0,latitude,longitude
project_code,Unnamed: 1_level_1,Unnamed: 2_level_1
"NAC1, NAC2",46.03,-0.605
MAZU,49.769167,3.702972
SOUR,48.53,5.41
VANA,48.86,4.68
"FRA1, FRA2",43.58,2.828


# Création du dataframe ERA5 horaire

In [11]:
#On choisit le projet
project = 'ESPS'

In [12]:
hourly_ERA5 = netcdf.sel(latitude=projects.loc[project].latitude, longitude=projects.loc[project].latitude, method="nearest").to_dataframe()
hourly_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)

In [13]:
hourly_ERA5.sample(5)

Unnamed: 0_level_0,u100,v100,u10,v10,d2m,t2m,sf,sp,msdwswrf,tp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001-02-20 04:00:00,1.207108,0.486913,0.73703,0.485009,271.797119,273.728729,7.8e-05,97782.1875,0.0,7.9e-05
2001-02-08 05:00:00,2.851922,4.458023,-0.423755,2.15691,273.036438,282.72467,0.0,95150.5,0.0,0.0
2001-02-12 08:00:00,0.066251,5.330097,-1.578296,2.707469,272.534546,277.867676,0.0,97863.273438,60.503998,0.0
2001-02-22 03:00:00,5.912141,4.151194,2.851,2.758531,274.866486,275.821808,5e-05,96742.3125,0.0,0.00077
2001-02-22 14:00:00,5.034701,-0.888256,3.041429,-0.43714,276.841736,278.280182,0.0,96239.210938,115.535599,0.000734


# Récupération d'une année entière

In [14]:
#On choisit l'année et le projet
year = 2001
project = 'ESPS'

In [15]:
year_ERA5 = pd.DataFrame(columns=['time','u100','v100','u10','v10','d2m','t2m','sf','sp','msdwswrf','tp'])
year_ERA5.set_index('time',inplace=True)

In [16]:
for month in range(1,13) :
    name = str(year)+'-'+str('0'+str(month) if month<10 else month)
    input_name_nc = f'ERA5_france_'+name+'.nc'
    bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
    S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
    body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
    netcdf = xr.open_dataset(body, )
    
    df_ERA5 = netcdf.sel(latitude=projects.loc[project].latitude, longitude=projects.loc[project].latitude, method="nearest").to_dataframe()
    df_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)
    
    year_ERA5 = pd.concat([year_ERA5, df_ERA5])

In [17]:
#Visualisation
year_ERA5.sample(5)

Unnamed: 0_level_0,u100,v100,u10,v10,d2m,t2m,sf,sp,msdwswrf,tp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001-05-22 22:00:00,-3.900232,-3.780166,-1.378312,-1.597243,280.340454,286.546478,0.0,96532.664062,0.0,0.0
2001-03-28 18:00:00,2.669271,6.013102,0.90187,3.195924,279.302338,282.333099,0.0,95300.125,10.027313,0.0001573944
2001-01-11 19:00:00,-3.132253,-1.200845,-1.490681,-0.11154,277.122742,277.157654,0.0,96563.265625,0.0,5.755574e-06
2001-11-20 16:00:00,-2.403594,-2.472601,-0.975581,-0.929706,274.790283,277.374512,0.0,97573.890625,11.683899,-2.328306e-10
2001-01-17 17:00:00,0.76666,2.196599,-0.622891,1.446798,269.494049,272.873688,0.0,96543.570312,0.0,0.0


# Récupération d'un projet entier

In [18]:
#On choisit la période et le projet
start_month = '2000-01'
end_month = '2021-11'
project = 'ESPS'

In [19]:
project_ERA5 = pd.DataFrame(columns=['time','u100','v100','u10','v10','d2m','t2m','sf','sp','msdwswrf','tp'])
project_ERA5.set_index('time',inplace=True)

In [20]:
months_range = pd.date_range(start=start_month, end=end_month, freq='MS')
    
for date in months_range :
    input_name_nc = f'ERA5_france_'+str(date)[0:7]+'.nc'
    bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
    S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
    body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
    netcdf = xr.open_dataset(body, )
    
    df_ERA5 = netcdf.sel(latitude=projects.loc[project].latitude, longitude=projects.loc[project].latitude, method="nearest").to_dataframe()
    df_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)
    
    project_ERA5 = pd.concat([project_ERA5, df_ERA5])

In [21]:
#Visualisation
project_ERA5.sample(5)

Unnamed: 0_level_0,u100,v100,u10,v10,d2m,t2m,sf,sp,msdwswrf,tp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-06-15 07:00:00,3.840508,5.342216,2.061563,3.270018,288.480652,291.079926,0.0,95534.804688,133.717712,0.0006503654
2015-02-20 12:00:00,1.893699,6.24686,1.231964,4.259797,273.281555,282.976349,0.0,96364.195312,507.369751,2.328306e-10
2013-02-15 02:00:00,1.442065,5.505701,-0.006762,2.756716,272.060883,272.509003,0.0003612218,96137.453125,0.0,0.0003614989
2016-10-19 08:00:00,4.413417,1.831125,2.879035,1.4013,280.253174,281.73407,-2.328306e-10,96827.617188,86.341003,0.0001258519
2011-05-06 12:00:00,1.489488,1.208729,1.26958,0.814349,279.438721,294.841583,0.0,96809.492188,873.68396,-4.656613e-10


# Traitement des données

In [22]:
#Load standart power curve
S3_pc_url = f's3://{bucket_name}/ERA5/config/power_curve_V90-3.0MW.csv'
power_curve = pd.read_csv(S3_pc_url, index_col='windspeed')

#On calcule les vitesses et direction de vent
project_ERA5['ws100'] = (project_ERA5['u100']**2 + project_ERA5['v100']**2)**0.5
project_ERA5['wd100'] = round(np.arctan2(project_ERA5['u100'], project_ERA5['v100'])*180/pi + 180,0)
project_ERA5['ws10'] = (project_ERA5['u10']**2 + project_ERA5['v10']**2)**0.5
project_ERA5['wd10'] = round(np.arctan2(project_ERA5['u10'], project_ERA5['v10'])*180/pi + 180,0)
project_ERA5['E100'] = np.interp(project_ERA5['ws100'], power_curve.index, power_curve['power'])  # Energy (using a power curve)
project_ERA5['rh'] = 100 - 5 * (project_ERA5['t2m'] - project_ERA5['d2m'])
project_ERA5['density'] = project_ERA5['sp'] /  ( 287.058 * project_ERA5['t2m'])
project_ERA5['E100_cor'] = project_ERA5['E100']*project_ERA5['density']/1.225   

project_ERA5.drop(columns=['u100', 'v100'], inplace=True)
project_ERA5.drop(columns=['u10', 'v10'], inplace=True)
project_ERA5.drop(columns=['sf', 'msdwswrf', 'tp'], inplace=True)

 2022-01-25 15:36:31,271 -  INFO -  NumExpr defaulting to 2 threads.


In [23]:
#Visualisation
project_ERA5.sample(5)

Unnamed: 0_level_0,d2m,t2m,sp,ws100,wd100,ws10,wd10,E100,rh,density,E100_cor
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-09-15 21:00:00,284.906525,285.461334,96207.0,5.960778,229.0,3.153451,224.0,346.606786,97.225952,1.174058,332.19317
2017-01-13 00:00:00,276.486084,279.223877,94509.171875,13.971225,241.0,8.354449,240.0,2961.316806,86.311035,1.179103,2850.365369
2014-07-11 04:00:00,286.26355,286.717834,96081.585938,2.392532,257.0,1.183901,222.0,0.0,97.728577,1.16739,0.0
2003-11-08 18:00:00,274.76062,277.565216,96637.679688,1.902335,178.0,1.628862,169.0,0.0,85.97702,1.212863,0.0
2013-11-16 00:00:00,273.615936,275.723053,97362.53125,3.289596,78.0,1.152999,71.0,22.298879,89.464417,1.230125,22.392164


In [24]:
ERA5_hourly_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Data/ERA5/ERA5_hourly/Clean/ERA5_'+project+'.csv')

In [25]:
#Sauvegarde
project_ERA5.to_csv(ERA5_hourly_path, index=True, sep=';')

In [None]:
#Sauvegarde sur le S3
outfile = 'ERA5_'+project+'.csv'
projects_ERA5.to_csv(f's3://{S3_bucket_name}/{S3_CSV_FOLD}/{outfile}', index=True, sep=';')

# Récupération d'une liste de projets en France