# Imports

In [1]:
pip install xarray 

Note: you may need to restart the kernel to use updated packages.


In [2]:
import boto3
import xarray as xr #to read netcdf
import pandas as pd
import os
import logging
import numpy as np
from math import pi
import datetime
from dateutil.relativedelta import relativedelta
from pathlib import Path

In [3]:
#Logger set-up
logging.basicConfig(format=' %(asctime)s -  %(levelname)s -  %(message)s', 
                    handlers = [logging.StreamHandler()])
logging.getLogger().setLevel(logging.INFO)

In [4]:
S3_bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
S3_CSV_FOLD = r'EtudeWindIndex/ERA5'

# Récupération des données ERA5 

In [5]:
#On choisit l'année et le mois
year = 2021
month = 2
name = str(year)+'-'+str('0'+str(month) if month<10 else month)

In [6]:
input_name_nc = f'ERA5_france_'+name+'.nc'
bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
netcdf = xr.open_dataset(body, )

In [7]:
netcdf

In [8]:
# read project information and compute associated nodes
S3_project_url = f's3://{bucket_name}/ERA5/config/ERA5_project_list.csv'
projects = pd.read_csv(S3_project_url, index_col='project_code', sep=';')

 2022-01-27 16:48:44,022 -  INFO -  ascii passed initial chaos probing. Mean measured chaos is 0.000000 %
 2022-01-27 16:48:44,024 -  INFO -  ascii should target any language(s) of ['Latin Based']
 2022-01-27 16:48:44,030 -  INFO -  We detected language [('English', 1.0), ('Indonesian', 1.0), ('Simple English', 1.0)] using ascii
 2022-01-27 16:48:44,035 -  INFO -  ascii is most likely the one. Stopping the process.
 2022-01-27 16:48:44,042 -  INFO -  ascii passed initial chaos probing. Mean measured chaos is 0.000000 %
 2022-01-27 16:48:44,043 -  INFO -  ascii should target any language(s) of ['Latin Based']
 2022-01-27 16:48:44,046 -  INFO -  We detected language [('German', 0.8333), ('Hungarian', 0.8333), ('Slovak', 0.8333), ('English', 0.75), ('Dutch', 0.75), ('Italian', 0.75), ('Swedish', 0.75), ('Norwegian', 0.75), ('Czech', 0.75), ('Indonesian', 0.75), ('Danish', 0.75), ('Polish', 0.6667), ('Finnish', 0.6667), ('Slovene', 0.6667), ('Turkish', 0.5833), ('Vietnamese', 0.5), ('Lithu

Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-371669e28083>", line 3, in <module>
    projects = pd.read_csv(S3_project_url, index_col='project_code', sep=';')
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/io/parsers.py", line 688, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/io/parsers.py", line 437, in _read
    filepath_or_buffer, encoding, compression
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/io/common.py", line 222, in get_filepath_or_buffer
    filepath_or_buffer, mode=mode or "rb", **(storage_options or {})
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fsspec/core.py", line 134, in open
    ou

TypeError: object of type 'NoneType' has no len()

In [None]:
projects.sample(5)

# Création du dataframe ERA5 horaire

In [None]:
#On choisit le projet
project = 'ESPS'

In [None]:
hourly_ERA5 = netcdf.sel(latitude=projects.loc[project].latitude, longitude=projects.loc[project].latitude, method="nearest").to_dataframe()
hourly_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)

In [None]:
hourly_ERA5.sample(5)

# Récupération d'une année entière

In [None]:
#On choisit l'année et le projet
year = 2001
project = 'ESPS'

In [None]:
year_ERA5 = pd.DataFrame(columns=['time','u100','v100','u10','v10','d2m','t2m','sf','sp','msdwswrf','tp'])
year_ERA5.set_index('time',inplace=True)

In [None]:
for month in range(1,13) :
    name = str(year)+'-'+str('0'+str(month) if month<10 else month)
    input_name_nc = f'ERA5_france_'+name+'.nc'
    bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
    S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
    body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
    netcdf = xr.open_dataset(body, )
    
    df_ERA5 = netcdf.sel(latitude=projects.loc[project].latitude, longitude=projects.loc[project].latitude, method="nearest").to_dataframe()
    df_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)
    
    year_ERA5 = pd.concat([year_ERA5, df_ERA5])

In [None]:
#Visualisation
year_ERA5.sample(5)

# Récupération d'un projet entier

In [None]:
#On choisit la période et le projet
start_month = '2000-01'
end_month = '2021-11'
project = 'ESPS'

In [None]:
project_ERA5 = pd.DataFrame(columns=['time','u100','v100','u10','v10','d2m','t2m','sf','sp','msdwswrf','tp'])
project_ERA5.set_index('time',inplace=True)

In [None]:
months_range = pd.date_range(start=start_month, end=end_month, freq='MS')
    
for date in months_range :
    input_name_nc = f'ERA5_france_'+str(date)[0:7]+'.nc'
    bucket_name = 'edfred-edfre-sbx-eu-west-1-solar-radiation-data'
    S3_origin = os.path.join('ERA5', 'netcdf', 'france', input_name_nc)  
    body = boto3.Session().resource('s3').Bucket(bucket_name).Object(S3_origin).get()['Body'].read()
    netcdf = xr.open_dataset(body, )
    
    df_ERA5 = netcdf.sel(latitude=projects.loc[project].latitude, longitude=projects.loc[project].latitude, method="nearest").to_dataframe()
    df_ERA5.drop(columns=['longitude', 'latitude'], inplace=True)
    
    project_ERA5 = pd.concat([project_ERA5, df_ERA5])

In [None]:
#Visualisation
project_ERA5.sample(5)

# Traitement des données

In [None]:
#Load standart power curve
S3_pc_url = f's3://{bucket_name}/ERA5/config/power_curve_V90-3.0MW.csv'
power_curve = pd.read_csv(S3_pc_url, index_col='windspeed')

#On calcule les vitesses et direction de vent
project_ERA5['ws100'] = (project_ERA5['u100']**2 + project_ERA5['v100']**2)**0.5
project_ERA5['wd100'] = round(np.arctan2(project_ERA5['u100'], project_ERA5['v100'])*180/pi + 180,0)
project_ERA5['ws10'] = (project_ERA5['u10']**2 + project_ERA5['v10']**2)**0.5
project_ERA5['wd10'] = round(np.arctan2(project_ERA5['u10'], project_ERA5['v10'])*180/pi + 180,0)
project_ERA5['E100'] = np.interp(project_ERA5['ws100'], power_curve.index, power_curve['power'])  # Energy (using a power curve)
project_ERA5['rh'] = 100 - 5 * (project_ERA5['t2m'] - project_ERA5['d2m'])
project_ERA5['density'] = project_ERA5['sp'] /  ( 287.058 * project_ERA5['t2m'])
project_ERA5['E100_cor'] = project_ERA5['E100']*project_ERA5['density']/1.225   

project_ERA5.drop(columns=['u100', 'v100'], inplace=True)
project_ERA5.drop(columns=['u10', 'v10'], inplace=True)
project_ERA5.drop(columns=['sf', 'msdwswrf', 'tp'], inplace=True)

In [None]:
#Visualisation
project_ERA5.sample(5)

In [None]:
ERA5_hourly_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Data/ERA5/ERA5_hourly/Clean/ERA5_'+project+'.csv')

In [None]:
#Sauvegarde
project_ERA5.to_csv(ERA5_hourly_path, index=True, sep=';')

In [None]:
#Sauvegarde sur le S3
outfile = 'ERA5_'+project+'.csv'
projects_ERA5.to_csv(f's3://{S3_bucket_name}/{S3_CSV_FOLD}/{outfile}', index=True, sep=';')

# Récupération d'une liste de projets en France