This notebook utilizes the non-interactive (API) [European Commission's PVGIS tool](https://ec.europa.eu/jrc/en/pvgis) to download hourly PV performance data for 2016. As the tool allows sampling of any coordinate, the resolution of hourly wind & PV performance data is restricted by the resolution of the MERRA wind data, which has a resolution of  0.5 x 0.625°. Thus, the points of interest are first obtained from the MERRA wind datasets: for each country, the downloaded MERRA coordinate points are extracted.

Then, PVGIS API is utilized to extract the following features for each coordinate point (see: [API reference](https://ec.europa.eu/jrc/en/PVGIS/docs/noninteractive) and [output documentation](https://ec.europa.eu/jrc/en/PVGIS/tools/hourly-radiation)):
- <b>Wh [Wh] -</b> hourly power output of a PV installation per kW of installed capacity of optimally aligned single horizontal axis aligned north-south PV panels ([datasources and calculation methods](https://ec.europa.eu/jrc/en/PVGIS/docs/methods))
- <b>G(i) [W/m2] -</b> Global in-plane irradiance
- <b>H_sun [º] -</b> Sun height (elevation)
- <b>T2m [°C] -</b> Air temperature
- <b>WS10m [m/s] -</b> Wind speed at 10m

System assumptions:
- PV panels are optimally aligned single horizontal axis aligned north-south panels
- Sum of system losses = 14% <font color=red>(this should be reviewed?)</font>

The databases used to calculate radiation depend on the location being queried: PVGIS-SARAH for most of Europe and PVGIS-ERA5 for European latitudes above 60 N. See more the "raddatabase" parameter description in the [API reference](https://ec.europa.eu/jrc/en/PVGIS/docs/noninteractive) and chapter 3 of the [PVGIS users manual](https://ec.europa.eu/jrc/en/PVGIS/docs/usermanual#fig:default_db).

The results are saved as parquet files for each country in:  
*/LAV/EnergySystemsGroup/Research/Aviation/SAFlogistics/data/PVGIS*
___

# Setup

In [3]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
# import multiprocessing as mp
import time
import geopandas as gpd
import aiohttp
import asyncio
import logging
import os

In [4]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(process)d - %(levelname)s: %(message)s')
file_handler1 = logging.FileHandler('/home/saskia/Documents/Master_Thesis/logs/N03_PVGIS_download_persistent.log')
file_handler1.setLevel(logging.INFO)
file_handler1.setFormatter(formatter)
file_handler2 = logging.FileHandler('/home/saskia/Documents/Master_Thesis/logs/N03_PVGIS_download.log',mode='w')
file_handler2.setLevel(logging.DEBUG)
file_handler2.setFormatter(formatter)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.ERROR)
stream_handler.setFormatter(formatter)
logger.addHandler(file_handler1)
logger.addHandler(file_handler2)
logger.addHandler(stream_handler)
logger.propogate = False

In [5]:
europe_points = gpd.read_file('/home/saskia/Documents/Master_Thesis/data/Countries_WGS84/Europe_Evaluation_Points.shp')

In [6]:
EU_EFTA = list(pd.read_csv('/home/saskia/Documents/Master_Thesis/data/EU_EFTA_Countries.csv',index_col=0).country)

# API Requests

In [7]:
results_dict = {} # Holds the DataFrames of the extracted data
problem_points = [] # These points encountered a problem on their first API request attempt
error_points = [] # These points encountered a problem on their second API request attempt
sea_points = [] # These points were found to be over the sea

async def download_data(point,PV_eval_loc,sema,session):
    parameters = {
                    'startyear':2016,
                    'endyear':2016,
                    'pvcalculation':1, # Nominal power of the PV system, in kW.
                    'peakpower':1,
                    'loss':14,
                    'trackingtype':1,
                    'optimalinclination':1,
                    'outputformat':'json'
                    }
    parameters['lat']=str(PV_eval_loc[0]); parameters['lon']=str(PV_eval_loc[1])
    async with sema:
        async with session.get('https://re.jrc.ec.europa.eu/api/seriescalc',params=parameters) as resp:
            status = resp.status
            try:
                response = await resp.json()
                if status == 200: # 200 means the request returned a response correctly. all others indicate an error
                    df = pd.DataFrame(pd.json_normalize(response['outputs']['hourly']))
                    df['lat'] = point[0]
                    df['lon'] = point[1]
                    results_dict[point] = df
                    print(results_dict)
                else: 
                    if 'sea' in response['message']:
                        # If the point was found to be over the sea, the message will indicate that
                        sea_points.append(point)
                    elif point not in problem_points:
                        # If the point wasn't already in the problem_points list, add it now
                        problem_points.append(point)
                    else:
                        # If the point was already in the problem_points list, this is the second time it has been queried and it is now considered an error point
                        problem_points.remove(point)
                        error_points.append(point)
                        logger.warning(f'Error with {point}. Point not saved')
            except aiohttp.client_exceptions.ClientPayloadError as e:
                if point not in problem_points:
                    problem_points.append(point)
                else:
                    problem_points.remove(point)
                    error_points.append(point)
                    logger.error(f'Error with {point}: {type(e)}. {e}. Point not saved.')

async def main(country_points):
    tasks = []
    sema = asyncio.Semaphore(10) # Limits the number of asynchronous calls to the API possible. Try lowering this number if you encounter connection issues
    async with aiohttp.ClientSession() as session:
        for idx in country_points.index:
            point = idx
            PV_eval_loc = (country_points.loc[idx,'PV_lat'],country_points.loc[idx,'PV_lon'])
            task = asyncio.ensure_future(download_data(point,PV_eval_loc,sema,session))
            tasks.append(task)
        responses = asyncio.gather(*tasks)
        await responses 

___
Notice: Running the following cell will query the API and save the results to files
___

In [8]:
for i,country in enumerate(EU_EFTA):
    # --- Setup ---
    if os.path.isfile('/home/saskia/Documents/Master_Thesis/data/PVGIS/'+country+'_PV.parquet.gzip'):
        logger.info(f'{country} file already found.')
        continue
    logger.info(f'Starting {country}...')
    country_points = europe_points.loc[europe_points.name==country].set_index(['lat','lon'])
    logger.info(f'{len(country_points)} points will be queried for {country}')
    stime = time.time()
    results_dict.clear()
    problem_points.clear()
    sea_points.clear()
    error_points.clear()
    if len(results_dict)+ len(problem_points) + len(sea_points) + len(error_points) > 0:
        logger.error(f'Lists or dictionaries not cleared properly during {country} evaluation.')
        break
    
    # --- Run queries ---
    await main(country_points)
    # --- Retry queries for problem points
    while len(problem_points)>0:
        await main(country_points.loc[country_points.index.isin(problem_points)])

    # --- Log results ---
    logger.info(f'Queries finished after {time.time()-stime:.1f} seconds')
    logger.info(f'{len(sea_points)} sea points for {country}')
    logger.info(f'{len(problem_points)} unresolved problem points for {country}')
    logger.info(f'{len(error_points)} error points for {country}')
    logger.info(f'{len(results_dict)} points were succesfully queried for {country} (out of {len(country_points)}).')

    # --- Concatenate the data returned for each point into a single dataframe (results_df) ---
    if len(results_dict)>0:
        results_df = pd.concat(results_dict.values())
        # Rename "P" (power, [Watt per kW installed]) column to 'Wh' (energy produced during the given hour, [Watt-hour per kW installed])
        results_df.rename(columns={'P':'Wh'},inplace=True)
        results_df['time'] = pd.to_datetime(results_df['time'],format='%Y%m%d:%H%M')
        results_df.set_index(['lat','lon','time'],inplace=True)
        results_df.sort_index(inplace=True)
        
        # --- Check for errors ---
        results_points = list(results_df.index.droplevel(2).unique())
        wrong_points = [x for x in results_points if x not in country_points.index]
        if len(wrong_points) > 0:
            logger.error(f'{len(wrong_points)} wrong points were saved for {country}.')
            break
    else: # For some countries, it is possible that the queries return no successful responses. For these, we create an empty dataframe
        results_df = pd.DataFrame({'lat':[],'lon':[],'time':[],'Wh':[],'G(i)':[],'H_sun':[],'T2m':[],'WS10m':[],'Int':[]})
        results_df.set_index(['lat','lon','time'],inplace=True)

    # --- Save the results to a file ---
    results_df.to_parquet('/home/saskia/Documents/Master_Thesis/data/PVGIS/'+country+'_PV.parquet.gzip',compression='gzip')
    logger.info(f'{country} file saved.')
    logger.info(f'{country} finished after {time.time()-stime:.1f} seconds')
logger.info('All queries finished')
print('All queries finished')

All queries finished
