# Functional script
This notebook will be used for the general operation of the application


- [Preparation](#Preparation)<br>

### 1. [Installation data](#Installation-data)

### 2. [Obtaining the data](#Obtaining-the-data)

### 3. [Data cleansing](#Data-cleansing)

### 4. [Data preparation](#Data-preparation)

### 5. [Radiation prediction](#Radiation-prediction)

### 6. [Ambient temperature prediction](#Ambient-temperature-prediction)

### 7. [Obtaining electricity production](#Obtaining-electricity-production)


## Preparation

The different libraries, datasets and functions are loaded

In [1]:
import numpy as np
import pandas as pd
import random
pd.options.display.max_columns = None
pd.options.display.max_rows = None
import matplotlib.pyplot as plt
plt.style.use("seaborn")

In [2]:
import math
import time
from datetime import timezone, datetime, date, timedelta
import os
import requests
import json
import re
import io

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle as pk
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline

In [4]:
hora_ini = 4
hora_fin = 20

The required functions defined in another dataset are imported

In [5]:
# !pip install ipynb
from ipynb.fs.full.Funciones_solares import *

The working directory is set

In [6]:
%cd /home/dsc/git/TFM/

/home/dsc/git/TFM


In [7]:
directorio = '/home/dsc/git/TFM/'

The list of AEMET weather stations is loaded

In [8]:
df_estaciones = pd.read_csv(directorio + 'data/estaciones.csv')

The list of AEMET radiation stations is loaded

In [9]:
df_estaciones_rad = pd.read_csv(directorio + 'data/estaciones_rad.csv')
df_estaciones_rad.dropna(inplace = True)
df_estaciones_rad.reset_index(drop = True, inplace = True)

### ``get_response_aemet()``

API Aemet function

In [10]:
def get_response_aemet(url_base = "", url = "", api_key = "", ide = ""):
    
    # Se unen las partes de la url final
    call = '/'.join([url_base, url, ide])
    if(ide == ""):
        call = call[:-1]

    headers = {    
        'Accept': 'application/json',  
        'Authorization': 'api_key' + api_key
    }
    response = requests.get(call, headers = headers)
    
    #Se obtienen los datos del body
    body = json.loads(response.text)["datos"]
    
    
    response = requests.get(body, headers = headers)
    if response:
        print('Exito')
    else:
        print('Ha ocurrido un error')

    return response.text


### ``get_response_OW()``

OpenWeather API function

In [11]:
def get_response_OW(url = ""):
    
    response = requests.get(url)

    if response:
        print('Exito')
    else:
        print('Ha ocurrido un error')

    return response.content


### ``openAndSkipLines()``

Function to count the lines to the response data from the CAMS SODA API

In [12]:
def openAndSkipLines(f, symbol):
# open a file, e.g. a CSV file, and skip lines beginning with symbol. Return the total number of lines and number of lines to skip (i.e. not containing data). If <0, file is empty
# The file is ready to be read at the first line of data

    buf = io.StringIO(f)
    
    nbTotalLines = len(buf.read())
    if(nbTotalLines == 0): return -1, -1
    buf.seek(0,0)
    stop = False
    nbLine = 0
    while (not stop) :
        nbLine = nbLine + 1
        l = buf.readline()
        if (l[0] != symbol): stop = True
    buf.seek(buf.tell()-len(l),0)
    nbLinesToSkip = nbLine-1
    return nbTotalLines, nbLinesToSkip 



### ``getCamsData()``

Function to generate dataframe with the response data from CAMS SODA

In [13]:
def getCamsData(camsFile, nbLinesToSkip):

    # Lista de variables CAMS:
    # Observation period;TOA;Clear sky GHI;Clear sky BHI;Clear sky DHI;Clear sky BNI;GHI;BHI;DHI;BNI;Reliability
    camsFile = io.StringIO(camsFile) 
    datacolumns = pd.DataFrame()
    dateBegins = list()
    dateEnds = list()
    toa = list()
    cs_ghi = list()
    cs_bhi = list()
    cs_dhi = list()
    cs_bni = list()
    ghi = list()
    bhi = list()
    dhi = list()
    bni = list()
    reliability = list()
    cont_lines = 0
    
    # Almaceno los datos de cada fila
    for ll in camsFile.readlines():
        cont_lines += 1
        if (cont_lines > nbLinesToSkip):
            ll = ll[0:len(ll)-1]
            #print(ll)
            l = ll.split(';')
            date = l[0].split('/')
            dateBegins.append(date[0].strip())
            dateEnds.append(date[1].strip())
            toa.append(l[1].strip())
            cs_ghi.append(l[2].strip())
            cs_bhi.append(l[3].strip())
            cs_dhi.append(l[4].strip())
            cs_bni.append(l[5].strip())
            ghi.append(l[6].strip())
            bhi.append(l[7].strip())
            dhi.append(l[8].strip())
            bni.append(l[9].strip())
            reliability.append(l[10].strip())

    # Genero el data frame
    dictio = {"dateBegins":dateBegins, "dateEnds":dateEnds, "toa":toa, "cs_ghi":cs_ghi, "cs_bhi":cs_bhi, "cs_dhi":cs_dhi, "cs_bni":cs_bni, "ghi":ghi, "bhi":bhi, "dhi" : dhi, "bni" : bni, "reliability" : reliability}
    datacolumns = pd.DataFrame(dictio)

    return datacolumns

### ``distancia()``

Function that calculates the Euclidean or Manhattan distance between two points

In [14]:
def distancia(lat1, lon1, lat2, lon2, distancia = "euclidea"):
    
    if(distancia == "euclidea"):
        dist = math.sqrt((lat1 - lat2)**2 + (lon1 -lon2)**2)
    
    elif(distancia == "manhattan"):
        dist = abs(lat1 - lat2) + abs(lon1 -lon2)
  
    return dist 

### ``conversor_coordenadas()``

Function that transforms coordinates from GMS to decimal

In [15]:
def conversor_coordenadas(coord):
    # If coord is latitude, north of the equator is always positive
    # If coord is longitude, west of the Meridian 0º are negative
    
    D = int(coord[0:2])
    M = float(coord[2:4])
    S = float(coord[4:6])
    
    #GMS to GD
    DD = float((D) + (M/60) + (S/3600))
        
    if(coord[6] == "S" or coord[6] == "W"):
            DD = -DD
            
    return DD

### ``dividir_train_test()``
This function divides the data into train and test sets:
- Train: prop (Default, 80%)
- Test: 100% - prop

In [16]:
def dividir_train_test(x,y, prop = 0.8):
    
    # Train ratio
    tam_train = prop

    # It is divided into train and test
    x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = tam_train, random_state = 1)

    print('x_train: {}%. Nº de datos: {}'.format((len(x_train)/len(x))*100, len(x_train)))
    print('y_train: {}%. Nº de datos: {}'.format((len(y_train)/len(y))*100, len(y_train)))


    print('x_test: {}%. Nº de datos: {}'.format((len(x_test)/len(x))*100, len(x_test)))
    print('y_test: {}%. Nº de datos: {}'.format((len(y_test)/len(y))*100, len(y_test)))
    
    return x_train, x_test, y_train, y_test

### ``graf_compara()``

This function represents the actual versus predicted values using vertical bars

In [17]:
def graf_compara(nombre_modelo, y_real, y_pred):
    
    # Prediction values
    predic = pd.DataFrame({'Dato': y_pred})
    predic.insert(len(predic.columns),"index",[i for i in range(0,len(predic["Dato"]))],True)
    
    # Real values
    real = pd.DataFrame({'Dato': y_real})
    real.insert(len(real.columns),"index",[i for i in range(0,len(real["Dato"]))],True)
    
    # Comparison
    comparacion = pd.concat([real, predic], keys=["Real", "Prediccion"]).reset_index()
    comparacion.drop(['level_1'], axis=1, inplace = True)
    comparacion.columns = ['Tipo', 'Dato', "Index"]
    #print(comparacion)

    sns.catplot(data = comparacion, kind = "bar", x = "Index", y = "Dato", hue = "Tipo", estimator = np.median, height = 10, aspect = 5)

### ``mape_fun()``

In [18]:
def mape_fun(y_real, y_pred): 
    y_real, y_pred = np.array(y_real), np.array(y_pred)
    return np.mean(np.abs((y_real - y_pred) / y_real)) * 100

### ``metricas()``

This function calculates the different metrics of the prediction produced by the model

Metrics: ``mae``, ``mse``,``rmse``, ``r2``, ``mape``

In [19]:
def metricas(modelo, y_real, y_pred):
    
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    
    # MAE: the error is calculated as an average of absolute differences between the target values and the predictions. All individual differences are weighted equally in the average.
    mae = mean_absolute_error(y_real, y_pred)

    # MSE: measures the mean squared error of the predictions. For each point, find the square difference between the predictions and target and then averages those values.
    mse = mean_squared_error(y_real, y_pred, squared = False)
    
    # RMSE: is the square root of MSE. Has the scale of the target variable.
    rmse = np.sqrt(mean_squared_error(y_real, y_pred))
    
    # R^2: it is closely related to the MSE, but has the advantage of being scale-free. It is always between -∞ and 1.
    r2 = r2_score(y_real, y_pred)
    
    # MAPE: For each object, the absolute error is divided by the target value, giving a relative error.
    #mape = mape_fun(y_real, y_pred)
    
    
    print('MODEL: ', modelo)
    print('MAE: ', mae)
    print('MSE: ', mse)
    print('RMSE: ', rmse)
    print('R2 : ', r2)
    #print('MAPE : ', mape)
    
    #return modelo, mae, mse, rmse, r2, mape
    return modelo, mae, mse, rmse, r2

### ``compracion_metricas()``

This function represents the metrics of each trained model, to be able to compare them

In [20]:
def compracion_metricas(lista_modelos):
    
    plt.style.use('ggplot')
    
    # Dataframes are created to host the metrics
    df_mae = pd.DataFrame(columns = ['mae', "modelo"])
    df_mse = pd.DataFrame(columns = ['mse', "modelo"])
    df_rmse = pd.DataFrame(columns = ['rmse', "modelo"])
    df_r2 = pd.DataFrame(columns = ['r2', "modelo"])
    #df_mape = pd.DataFrame(columns = ['mape', "modelo"])

    # Dataframes are filled with metrics
    for modelo in lista_modelos:
        
        df_mae = df_mae.append({'mae': modelo[1], "modelo": modelo[0]}, ignore_index=True)
        df_mse = df_mse.append({'mse': modelo[2], "modelo": modelo[0]}, ignore_index=True)
        df_rmse = df_rmse.append({'rmse': modelo[3], "modelo": modelo[0]}, ignore_index=True)
        df_r2 = df_r2.append({'r2': modelo[4], "modelo": modelo[0]}, ignore_index=True)
        #df_mape = df_mape.append({'mape': modelo[5], "modelo": modelo[0]}, ignore_index=True)
    
    # The figure is created and the subplots of each metric are added
    fig = plt.figure(figsize = (15, len(lista_modelos*4)))
    ax1 = fig.add_subplot(5,1,1)
    ax2 = fig.add_subplot(5,1,2)
    ax3 = fig.add_subplot(5,1,3)
    ax4 = fig.add_subplot(5,1,4)
    #ax5 = fig.add_subplot(5,1,5)
    
    #MAE
    fig = sns.barplot(x = "mae", y = "modelo", data = df_mae, ax = ax1, orient = "h", color = 'green').set_title("Comparación de métricas")
    ax1.tick_params(labelbottom = False, bottom = False)
    ax1.set_xlabel("MAE")
    ax1.set_ylabel(" ")
    for pa in ax1.patches:
        ax1.annotate("%.4f" % pa.get_width(), xy = (pa.get_width(), pa.get_y() + pa.get_height()/2),
            xytext = (5, 0), textcoords = 'offset points', ha = "left", va = "center")
    
    #MSE
    fig = sns.barplot(x = "mse", y = "modelo", data = df_mse, ax = ax2, orient = "h", color = 'red')
    ax2.tick_params(labelbottom = False, bottom = False)
    ax2.set_xlabel("MSE")
    ax2.set_ylabel(" ")
    for pa in ax2.patches:
        ax2.annotate("%.4f" % pa.get_width(), xy = (pa.get_width(), pa.get_y() + pa.get_height()/2),
            xytext = (5, 0), textcoords = 'offset points', ha = "left", va = "center")
        
    #RMSE
    fig = sns.barplot(x = "rmse", y = "modelo", data = df_rmse, ax = ax3, orient = "h", color = 'blue')
    ax3.tick_params(labelbottom = False, bottom = False)
    ax3.set_xlabel("RMSE")
    ax3.set_ylabel(" ")
    for pa in ax3.patches:
        ax3.annotate("%.4f" % pa.get_width(), xy = (pa.get_width(), pa.get_y() + pa.get_height()/2),
            xytext = (5, 0), textcoords = 'offset points', ha = "left", va = "center")

    #R2
    fig = sns.barplot(x = "r2", y = "modelo", data = df_r2, ax = ax4, orient = "h", color = 'yellow')
    ax4.set_xlabel("R2")
    ax4.set_ylabel(" ")
    ax4.tick_params(labelbottom = False, bottom = False)
    for pa in ax4.patches:
        ax4.annotate("%.4f" % pa.get_width(), xy = (pa.get_width(), pa.get_y() + pa.get_height()/2),
            xytext = (5, 0), textcoords = 'offset points', ha = "left", va = "center")
        
    #MAPE
    #fig = sns.barplot(x = "mape", y = "modelo", data = df_mape, ax = ax5, orient = "h", color = 'grey')
    #ax5.set_xlabel("Valor de la métrica")
    #ax5.set_ylabel(" ")
    #ax5.tick_params(labelbottom = False, bottom = False)
    #for pa in ax5.patches:
    #    ax5.annotate("%.4f" % pa.get_width(), xy = (pa.get_width(), pa.get_y() + pa.get_height()/2),
    #        xytext = (5, 0), textcoords = 'offset points', ha = "left", va = "center")

# Installation data
<div style = "float:right"><a style="text-decoration:none" href = "#Functional-script">

In [21]:
fecha_a_usar = date.today()
fecha = fecha_a_usar
fecha = "{}-{}-{}".format(fecha.year, str(fecha.month).zfill(2), str(fecha.day).zfill(2))
fecha

fallo = 0

In [22]:
# Installation data

lat = 41.29277777777778 #Latitude
lon = 2.0700000000000003 #Longitude
orient = 10 #Orientation, west
incl = 25 #Inclination
ppico = 4.62 #kW peak

# Obtaining data
<div style = "float:right"><a style="text-decoration:none" href = "#Functional-script">

## Weather data for the previous 5 days

This data is obtained from the OpenWeather portal (thanks to a student license that allows making a large number of calls per day) (https://openweathermap.org/api/one-call-api#history). **Data in UTC.** The hourly weather data for the 5 days prior to the call is accessed. The fields obtained are:

- ``dt``: Time of historical data, Unix, UTC
- ``temp``: Temperature. Units: kelvin
- ``feels_like``:  Temperature. This accounts for the human perception of weather. Units: kelvin
- ``pressure``: Atmospheric pressure on the sea level, hPa
- ``humidity``: Humidity, %
- ``dew_point``: Atmospheric temperature below which water droplets begin to condense and dew can form. Units: kelvin
- ``clouds``: Cloudiness, %
- ``visibility``: Average visibility, metres
- ``wind_speed``: Wind speed. Wind speed. Units: m/s
- ``wind_gust``: Wind gust. Units: m/s
- ``wind_deg``: Wind direction, degrees (meteorological)
- ``rain``: Precipitation volume, mm
- ``snow``: Snow volume, mm
- ``weather``: Includes an id and other parameters

In [23]:
try:
    
    # API password
    api_key = "f21448c171f8f0584b48b3c51c9b6cd6"
        
    df_clima_ow_total = pd.DataFrame()
    
    # For each historical day (previous 5 days)
    for retardo in range(0,5):


        dia = date.today() + timedelta(days = -retardo)
        dia = "{}-{}-{}".format(dia.year, str(dia.month).zfill(2), str(dia.day).zfill(2))
        print("fecha: {}".format(date.today() + timedelta(days = -retardo)))

        dia = datetime.strptime(dia, "%Y-%m-%d")

        # Convert datetime to timestamp
        dia_unix = int(datetime.timestamp(dia))

           
        time = dia_unix

        url = "https://api.openweathermap.org/data/2.5/onecall/timemachine?lat={}&lon={}&dt={}&appid={}".format(lat, lon, time, api_key)

        print("url: {}".format(url))
        response = get_response_OW(url)
            
            
        # Getting data from the response
        response = json.loads(response)
        df_clima_ow = pd.json_normalize(response["hourly"])
            
        # Extra columns are generated
            
        # Weather indicator
        df_we = []
        # Date of data day
        df_time = []
        # Hour
        df_hour = []
        # station ID
        df_estacion = []
        # Data collection date
        df_fecha = []
        for m in range(0,24):
            df_we.append(df_clima_ow["weather"][m][0]["id"])
            df_estacion.append(str(str(lat)+str(lon)))
            df_time.append(datetime.utcfromtimestamp(int(df_clima_ow["dt"][m])).strftime('%Y-%m-%d'))
            df_hour.append(datetime.utcfromtimestamp(int(df_clima_ow["dt"][m])).strftime('%H:%M')) 
            df_fecha.append(fecha)
            
        df_we = pd.DataFrame(df_we, columns=['we']) 
        df_estacion = pd.DataFrame(df_estacion, columns=['estacion'])
        df_time = pd.DataFrame(df_time, columns=['date']) 
        df_hour = pd.DataFrame(df_hour, columns=['hour']) 
        df_fecha = pd.DataFrame(df_fecha, columns=['fecha_prediccion']) 

        # The column with the weather indicator is added
        df_clima_ow = pd.concat([df_clima_ow, df_we], axis=1)
        # The weather row is eliminated, with more indicators
        df_clima_ow = df_clima_ow.drop("weather", axis = 1)
            
        # The column with the station ID is added
        df_clima_ow = pd.concat([df_estacion, df_clima_ow], axis=1)

        # The column is added with the date of the day when the data is obtained and the day and time to which each one corresponds
        # The dt from which they where obtained is eliminated
        df_time = pd.concat([df_time, df_hour, df_fecha], axis=1)
        df_clima_ow = pd.concat([df_time, df_clima_ow], axis=1)
        df_clima_ow = df_clima_ow.drop("dt", axis = 1)
        
        
        df_clima_ow_total = df_clima_ow_total.append(df_clima_ow, ignore_index = True)
    
    df_clima_ow = df_clima_ow_total

except:
    print("Fallo clima")
    fallo = 1

fecha: 2021-07-20
url: https://api.openweathermap.org/data/2.5/onecall/timemachine?lat=41.29277777777778&lon=2.0700000000000003&dt=1626732000&appid=f21448c171f8f0584b48b3c51c9b6cd6
Exito
fecha: 2021-07-19
url: https://api.openweathermap.org/data/2.5/onecall/timemachine?lat=41.29277777777778&lon=2.0700000000000003&dt=1626645600&appid=f21448c171f8f0584b48b3c51c9b6cd6
Exito
fecha: 2021-07-18
url: https://api.openweathermap.org/data/2.5/onecall/timemachine?lat=41.29277777777778&lon=2.0700000000000003&dt=1626559200&appid=f21448c171f8f0584b48b3c51c9b6cd6
Exito
fecha: 2021-07-17
url: https://api.openweathermap.org/data/2.5/onecall/timemachine?lat=41.29277777777778&lon=2.0700000000000003&dt=1626472800&appid=f21448c171f8f0584b48b3c51c9b6cd6
Exito
fecha: 2021-07-16
url: https://api.openweathermap.org/data/2.5/onecall/timemachine?lat=41.29277777777778&lon=2.0700000000000003&dt=1626386400&appid=f21448c171f8f0584b48b3c51c9b6cd6
Exito


## Weather predictions for the next 2 days

This data is obtained from the OpenWeather portal (thanks to a student license that allows a large number of calls per day) (https://openweathermap.org/api/one-call-api). **Data in UTC.** The hourly weather forecast for the 2 days following the call is accessed. The fields obtained are:

- ``dt``: Time of the forecasted data, Unix, UTC
- ``temp``: Temperature. Units: kelvin
- ``feels_like``: Temperature. This accounts for the human perception of weather. Units: kelvin
- ``pressure``: Atmospheric pressure on the sea level, hPa
- ``humidity``: Humidity, %
- ``dew_point``: Atmospheric temperature (varying according to pressure and humidity) below which water droplets begin to condense and dew can form. Units: kelvin
- ``uvi``: UV index
- ``clouds``: Cloudiness, %
- ``visibility``: Average visibility, metres
- ``wind_speed``: Wind speed. Units: m/s
- ``wind_gust``: Wind gust. Units: m/s
- ``wind_deg``: Wind direction, degrees (meteorological)
- ``pop``: Probability of precipitation
- ``rain``: Rain volume for last hour, mm
- ``snow``: Snow volume for last hour, mm
- ``weather``: Includes an id and other parameters

In [24]:
try:
    
    api_key= "f21448c171f8f0584b48b3c51c9b6cd6"

    exclude = "current,minutely,daily,alerts"

    url = "https://api.openweathermap.org/data/2.5/onecall?lat={}&lon={}&exclude={}&appid={}".format(lat, lon, exclude, api_key)


    response = get_response_OW(url)
    response = json.loads(response)
    df_pred_ow = pd.json_normalize(response["hourly"])


    # Generating extra columns

    # Weather indicator
    df_we = []
    # Date of data day
    df_time = []
    # Hour
    df_hour = []
    # station ID
    df_estacion = []
    # Data collection date
    df_fecha = []
    for m in range(0,48):
        df_we.append(df_pred_ow["weather"][m][0]["id"])
        df_estacion.append(str(str(lat)+str(lon)))
        df_time.append(datetime.utcfromtimestamp(int(df_pred_ow["dt"][m])).strftime('%Y-%m-%d'))
        df_hour.append(datetime.utcfromtimestamp(int(df_pred_ow["dt"][m])).strftime('%H:%M'))
        df_fecha.append(fecha)
    df_we = pd.DataFrame(df_we, columns=['we'])  
    df_estacion = pd.DataFrame(df_estacion, columns=['estacion'])
    df_time = pd.DataFrame(df_time, columns=['date']) 
    df_hour = pd.DataFrame(df_hour, columns=['hour'])
    df_fecha = pd.DataFrame(df_fecha, columns=['fecha_prediccion']) 
        
    # The weather indicator is added and the weather column is removed, with more values
    df_pred_ow = pd.concat([df_pred_ow, df_we], axis = 1)
    df_pred_ow = df_pred_ow.drop("weather", axis = 1)
        
    # Station ID is added
    df_pred_ow = pd.concat([df_estacion, df_pred_ow], axis=1)

    # The date of the day of the data request and the day and hour to which they correspond are added
    # The column dt is eliminated, from which the values are obtained
    df_time = pd.concat([df_time, df_hour, df_fecha], axis=1)
    df_pred_ow = pd.concat([df_time, df_pred_ow], axis=1)
    df_pred_ow = df_pred_ow.drop("dt", axis = 1)


except:
    print("Fallo pred")
    fallo = 1

Exito


## Radiation from the day before

**These data are only available for the different radiation stations**

the accumulated hours (**TRUE SOLAR TIME**) of global, direct, diffuse and infrared radiation. These data are obtained from the AEMET Opendata portal (https://opendata.aemet.es/centrodedescargas/productosAEMET). The fields obtained for each day are:

- ``Estación``: Name of the station
- ``Indicativo``: Indicative Climatological Station
- ``Tipo``: Measured variable (Global/Diffuse/Direct/Erythematic UV/Infrared)
- ``GL/DF/DT``: Hourly radiation accumulated between: (indicated hour -1) and (indicated hour) between 5 and 20. True Solar Time. Variables: Global/Diffuse/Direct (10 * kJ/m²)
- ``UVER``: Semi-hourly radiation accumulated between: (hour: indicated minutes - 30 minutes and (hour: indicated minutes) between 4:30 and 20. True Solar Time. Variables: Erythematic Ultraviolet Radiation (J/m²)
- ``IR``: Hourly radiation accumulated between (indicated hour -1) and (indicated hour) between 1 and 24 True Solar Time. Variables: Infrared radiation (10 * kJ/m²)
- ...

It will not be necessary to transform the time by is approximately equal to UTC (https://relojesdesol.info/node/748)

In [25]:
try:
    import csv
    
    api_key = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJhbGVqYW5kcm8ucnVpei5iZXJjaWFub0BnbWFpbC5jb20iLCJqdGkiOiI2NDNmZjZmMi04OTQyLTQ1YzYtODIxNC0yZGU4NmQzMDU0NWYiLCJpc3MiOiJBRU1FVCIsImlhdCI6MTYxMzQ3NjEwNywidXNlcklkIjoiNjQzZmY2ZjItODk0Mi00NWM2LTgyMTQtMmRlODZkMzA1NDVmIiwicm9sZSI6IiJ9.CCEfI4NjKp9kiTCFsNLQFB-u_oLhcXJTEtdHluoToe8"

    url_base = "https://opendata.aemet.es/opendata/api"

    estaciones_url = "red/especial/radiacion"

    resp = get_response_aemet(url_base, estaciones_url, api_key)

    # The data is processed
    datos_rad = resp[32:]

    lines = datos_rad.splitlines()
    fecha_lines = (resp.splitlines()[1])
    fecha_lines = fecha_lines[1:len(fecha_lines)-1]

    reader = csv.reader(lines)
    parsed_csv = list(reader)

    titulos = [palabra.strip() for palabra in parsed_csv[0][0].replace(';', ', ').replace("\"", "").split(",")]
    filas = [[palabra.strip() for palabra in fila[0].replace(';', ', ').replace("\"", "").split(",")] for fila in parsed_csv[1:]]
    
    
    # Some stations have the title split, the strings must be joined
    filas[15][0] = filas[15][0] + filas[15][1]
    filas[15].pop(1)

    
    # An indicative is corrected
    df_rad_aemet = pd.DataFrame(columns = titulos, data = filas)
    df_rad_aemet.loc[df_rad_aemet['Indicativo'] == '6156', 'Indicativo'] = '6156X'

    # The column with the date to which the data corresponds is added
    # It also ensures that the names of the stations that are also in the station list are the same
    df_fecha = []
    for i in range(0, len(df_rad_aemet['Indicativo'])):
        for j in range(0, len(df_estaciones['indicativo'])):
            if (df_rad_aemet['Indicativo'][i] == df_estaciones['indicativo'][j]):
                df_rad_aemet.loc[df_rad_aemet['Indicativo'] == df_estaciones['indicativo'][j], 'Estación'] = df_estaciones['nombre'][j]
        for j in range(0, len(titulos)):
            if(df_rad_aemet.iloc[i][j] == ""):
                continue
        df_fecha.append(fecha)

    df_fecha = pd.DataFrame(df_fecha, columns=['fecha']) 
    df_rad_aemet = pd.concat([df_fecha, df_rad_aemet], axis=1)

except:
    print("Fallo aemet")
    fallo = 1

Exito


## Radiation data from two days before

These data are obtained from the CAMS Radiation Service portal of the European Union (http://www.soda-pro.com/web-services/radiation/cams-radiation-service). **In UTC hour.** Provide radiation for any date up to 2 days before the call (3 day delay). The fields obtained for each day are:

- ``Observation period``: Beginning/end of the time period with the format "yyyy-mm-ddTHH:MM:SS.S/yyyy-mm-ddTHH:MM:SS.S"
- ``TOA``: Irradiation on horizontal plane at the top of atmosphere (Wh/m2) computed from Solar Geometry 2
- ``Clear sky GHI``: Clear sky global irradiation on horizontal plane at ground level (Wh/m2)
- ``Clear sky BHI``: Clear sky beam irradiation on horizontal plane at ground level (Wh/m2)
- ``Clear sky DHI``: Clear sky diffuse irradiation on horizontal plane at ground level (Wh/m2)
- ``Clear sky BNI``: Clear sky beam irradiation on mobile plane following the sun at normal incidence (Wh/m2)
- ``GHI``: Global irradiation on horizontal plane at ground level (Wh/m2)
- ``BHI``: Beam irradiation on horizontal plane at ground level (Wh/m2)
- ``DHI``: Diffuse irradiation on horizontal plane at ground level (Wh/m2)
- ``BNI``: Beam irradiation on mobile plane following the sun at normal incidence (Wh/m2)
- ``Reliability``: Proportion of reliable data in the summarization (0-1)

In [26]:
try:
    
    import math
    import time
    from bs4 import BeautifulSoup
    
    dia = date.today() + timedelta(days = -2)
    fecha_buscar = "{}-{}-{}".format(dia.year, str(dia.month).zfill(2), str(dia.day).zfill(2))
    print(fecha_buscar)
    fecha_ini = fecha_buscar
    fecha_fin = fecha_buscar    

    print(lat, lon)

    correo = 'alejandro.ruiz.berciano%2540gmail.com'
        
    url = 'http://www.soda-is.com/service/wps?Service=WPS&Request=Execute&Identifier=get_cams_radiation&version=1.0.0&DataInputs=latitude={};longitude={};altitude=-999;date_begin={};date_end={};time_ref=UT;summarization=PT01H;username={}&RawDataOutput=irradiation'.format(lat, lon, fecha_ini, fecha_fin, correo)
    print(url)

    response = requests.get(url)
        
    # The response is converted to text and it is determined how many lines there are until the data
    soup = BeautifulSoup(response.content)

    f = soup.text
    nbTotalLines = 0
    nbLinesToSkip = 0
    nbTotalLines, nbLinesToSkip = openAndSkipLines(f, '#')

    if(nbTotalLines < 0):
        print('No hay datos')
        exit()
    sizeData = nbTotalLines - nbLinesToSkip
        
    # The data frame is created and a column is added with the station ID
    df_soda = getCamsData(f, nbLinesToSkip)
    df_soda.insert(len(df_soda.columns),"estacion",list(np.repeat([str(str(lat)+str(lon))], len(df_soda["dateEnds"]))),True)
    
except:
    print("Fallo soda")
    fallo = 1

2021-07-18
41.29277777777778 2.0700000000000003
http://www.soda-is.com/service/wps?Service=WPS&Request=Execute&Identifier=get_cams_radiation&version=1.0.0&DataInputs=latitude=41.29277777777778;longitude=2.0700000000000003;altitude=-999;date_begin=2021-07-18;date_end=2021-07-18;time_ref=UT;summarization=PT01H;username=alejandro.ruiz.berciano%2540gmail.com&RawDataOutput=irradiation


# Data cleansing
<div style = "float:right"><a style="text-decoration:none" href = "#Functional-script">

## Climate data from previous 5 days

This data is obtained from the OpenWeather portal (thanks to a student license that allows a large number of calls per day) (https://openweathermap.org/api/one-call-api#history). **Data in UTC.** The hourly weather data for the 5 days prior to the call is accessed. The obtained fields are:

- ``dt``: Time of historical data, Unix, UTC
- ``temp``: Temperature. Units: kelvin
- ``feels_like``:  Temperature. This accounts for the human perception of weather. Units: kelvin
- ``pressure``: Atmospheric pressure on the sea level, hPa
- ``humidity``: Humidity, %
- ``dew_point``: Atmospheric temperature below which water droplets begin to condense and dew can form. Units: kelvin
- ``clouds``: Cloudiness, %
- ``visibility``: Average visibility, metres
- ``wind_speed``: Wind speed. Wind speed. Units: m/s
- ``wind_gust``: Wind gust. Units: m/s
- ``wind_deg``: Wind direction, degrees (meteorological)
- ``rain``: Precipitation volume, mm
- ``snow``: Snow volume, mm
- ``we``: Includes an id that indicates the type of weather

Hour X contains the data elapsed between X:00 and X:59

In [27]:
def clima_ow_clean(df_datos):
    
    # Columns are converted to the correct data types

    df_datos["hour"] = pd.to_numeric([np.nan if pd.isna(c) == True else str(c)[:2] for c in df_datos["hour"]])
    df_datos = df_datos[(df_datos["hour"] < hora_fin) & (df_datos["hour"] >= hora_ini)]
    df_datos.reset_index(drop=True, inplace=True)
    
    # Na's are eliminated
    
    try:
        df_datos.fillna({'visibility': df_datos["visibility"].mean(), 'wind_gust': df_datos["wind_gust"].mean()}, inplace = True)
    except:
        df_datos.fillna({'visibility': 0, 'wind_gust': 0}, inplace = True)
        
    try:
        df_datos.drop(['rain.1h'], axis=1, inplace = True)
    except:
        pass

    try:
        df_datos.drop(['snow.1h'], axis=1, inplace = True)
    except:
        pass
    
    try:
        df_datos.drop(['uvi'], axis=1, inplace = True)
    except:
        pass
    
    df_datos = df_datos.fillna(0)
    
    # Duplicated rows are eliminated
    
    df_datos = df_datos.drop_duplicates(['date', 'hour', "fecha_prediccion", "estacion"],
                        keep = 'first')
    df_datos.reset_index(drop = True, inplace = True)
    
    return df_datos

In [28]:
# The function is called
try:
    df_clima_clean = clima_ow_clean(df_clima_ow)
except:
    fallo = 1
    
df_clima_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,date,hour,fecha_prediccion,estacion,temp,feels_like,pressure,humidity,dew_point,clouds,visibility,wind_speed,wind_deg,wind_gust,we
0,2021-07-19,4,2021-07-20,41.292777777777782.0700000000000003,297.01,297.17,1012,66,290.28,0,10000,1.54,280,3.628333,800
1,2021-07-19,5,2021-07-20,41.292777777777782.0700000000000003,297.0,297.16,1012,66,290.27,20,10000,3.09,300,3.628333,801
2,2021-07-19,6,2021-07-20,41.292777777777782.0700000000000003,298.63,298.85,1012,62,290.82,0,10000,3.6,290,3.628333,800
3,2021-07-19,7,2021-07-20,41.292777777777782.0700000000000003,300.3,301.12,1012,56,290.77,0,10000,3.6,290,3.628333,800
4,2021-07-19,8,2021-07-20,41.292777777777782.0700000000000003,301.74,302.87,1012,55,291.82,0,10000,1.03,0,3.628333,800


## Weather predictions for the next 2 days

This data is obtained from the OpenWeather portal (thanks to a student license that allows a large number of calls per day) (https://openweathermap.org/api/one-call-api). **Data in UTC.** The hourly weather forecast for the 2 days following the call is accessed. The fields obtained are:

- ``dt``: Time of the forecasted data, Unix, UTC
- ``temp``: Temperature. Units: kelvin
- ``feels_like``: Temperature. This accounts for the human perception of weather. Units: kelvin
- ``pressure``: Atmospheric pressure on the sea level, hPa
- ``humidity``: Humidity, %
- ``dew_point``: Atmospheric temperature (varying according to pressure and humidity) below which water droplets begin to condense and dew can form. Units: kelvin
- ``uvi``: UV index
- ``clouds``: Cloudiness, %
- ``visibility``: Average visibility, metres
- ``wind_speed``: Wind speed. Units: m/s
- ``wind_gust``: Wind gust. Units: m/s
- ``wind_deg``: Wind direction, degrees (meteorological)
- ``pop``: Probability of precipitation
- ``rain``: Rain volume for last hour, mm
- ``snow``: Snow volume for last hour, mm
- ``weather``: Includes an id and other parameters

Hour X contains the data elapsed between X:00 and X:59

In [29]:
def pred_clean(df_datos):
    
    # Columns are converted to the correct data types
    
    df_datos["hour"] = pd.to_numeric([np.nan if pd.isna(c) == True else str(c)[:2] for c in df_datos["hour"]])
    df_datos = df_datos[(df_datos["hour"] < hora_fin) & (df_datos["hour"] >= hora_ini)]
    df_datos.reset_index(drop=True, inplace=True)
    
    # Na's are eliminated
    
    try:
        df_datos.drop(['rain.1h'], axis=1, inplace = True)
    except:
        pass
    try:
        df_datos.drop(['snow.1h'], axis=1, inplace = True)
    except:
        pass
    
    df_datos = df_datos.fillna(0)
    
    # Possible duplicated rows are eliminated
    
    df_datos = df_datos.drop_duplicates(['date', 'hour', "fecha_prediccion", "estacion"],
                        keep = 'first')
    df_datos.reset_index(drop = True, inplace = True)
    
    return df_datos

In [30]:
# The function is called
try:
    df_pred_clean = pred_clean(df_pred_ow)
except:
    fallo = 1
    
df_pred_clean.head()

Unnamed: 0,date,hour,fecha_prediccion,estacion,temp,feels_like,pressure,humidity,dew_point,uvi,clouds,visibility,wind_speed,wind_deg,wind_gust,pop,we
0,2021-07-20,19,2021-07-20,41.292777777777782.0700000000000003,299.65,299.65,1015,78,295.5,0.0,6,10000,3.77,232,5.42,0.14,800
1,2021-07-21,4,2021-07-20,41.292777777777782.0700000000000003,298.56,298.98,1016,70,292.38,0.0,6,10000,2.25,255,2.81,0.0,800
2,2021-07-21,5,2021-07-20,41.292777777777782.0700000000000003,298.58,298.98,1017,69,292.05,0.0,10,10000,2.07,255,2.4,0.0,800
3,2021-07-21,6,2021-07-20,41.292777777777782.0700000000000003,298.85,299.23,1017,67,291.8,0.45,9,10000,2.55,251,3.03,0.0,800
4,2021-07-21,7,2021-07-20,41.292777777777782.0700000000000003,299.3,299.3,1018,65,291.7,1.41,1,10000,2.5,243,2.88,0.0,800


## Radiation data from the previous day

the accumulated hours (**TRUE SOLAR TIME**) of global, direct, diffuse and infrared radiation. These data are obtained from the AEMET Opendata portal (https://opendata.aemet.es/centrodedescargas/productosAEMET). The fields obtained for each day are:

- ``Estación``: Name of the station
- ``Indicativo``: Indicative Climatological Station
- ``Tipo``: Measured variable (Global/Diffuse/Direct/Erythematic UV/Infrared)
- ``GL/DF/DT``: Hourly radiation accumulated between: (indicated hour -1) and (indicated hour) between 5 and 20. True Solar Time. Variables: Global/Diffuse/Direct (10 * kJ/m²)
- ``UVER``: Semi-hourly radiation accumulated between: (hour: indicated minutes - 30 minutes and (hour: indicated minutes) between 4:30 and 20. True Solar Time. Variables: Erythematic Ultraviolet Radiation (J/m²)
- ``IR``: Hourly radiation accumulated between (indicated hour -1) and (indicated hour) between 1 and 24 True Solar Time. Variables: Infrared radiation (10 * kJ/m²)
- ...

Hour X contains the data elapsed between (X-1):00 and X:00

In [31]:
def rad_aemet_clean(df_datos):
    
    # Column names are changed accordingly
    columnas = []
    for i in df_rad_aemet.columns:

        if (i+".3") in columnas:
            columnas.append(i+".4")
        elif (i+".2") in columnas:
            columnas.append(i+".3")
        elif (i+".1") in columnas:
            columnas.append(i+".2")
        elif i in columnas:
            columnas.append(i+".1")
        elif i not in columnas:
            columnas.append(i) 
    df_datos.columns = columnas

    
    # The hourly dataset is generated
    
    hora_ini_aemet = 5
    hora_fin_aemet = 21
    dif = int(int(hora_fin_aemet)-int(hora_ini_aemet))
    df_rad_horas = pd.DataFrame(columns = ["fecha", "hora", "estacion", "indicativo", "GL", "DF", "DT", "UVB", "IR"])

    for i, fila in df_datos.iterrows():

        for j in range(0, dif):
            hora = 5+j
            col_gl = str(hora)
            col_df = str(hora) + ".1"
            col_dt = str(hora) + ".2"
            col_uvb = str(hora) + ".3"
            col_uvb_2 = str(hora-1) + ".5"
            col_ir = str(hora) + ".4"
            df_rad_horas = df_rad_horas.append({'fecha' : fila["fecha"], 'estacion' : fila["Estación"], 'indicativo' : fila["Indicativo"], 'GL' : fila[col_gl], 'DF' : fila[col_df], 'DT' : fila[col_dt], 'UVB' : (fila[col_uvb] + fila[col_uvb_2]), 'IR' : fila[col_ir], 'hora' : hora-1}, ignore_index = True)
    df_rad_horas.drop(['DF'], axis=1, inplace = True)
    df_rad_horas.drop(['DT'], axis=1, inplace = True)
    
    # Columns are converted to the correct data types
    
    df_rad_horas["hora"] = pd.to_numeric([np.nan if pd.isna(c) == True else int(c) for c in df_rad_horas["hora"]])
    df_rad_horas["GL"] = pd.to_numeric([np.nan if (pd.isna(c) == True) or (c == "") else float(c) for c in df_rad_horas["GL"]])
    df_rad_horas["UVB"] = pd.to_numeric([np.nan if (pd.isna(c) == True) or (c == "") else float(c) for c in df_rad_horas["UVB"]])
    df_rad_horas["IR"] = pd.to_numeric([np.nan if (pd.isna(c) == True) or (c == "") else float(c) for c in df_rad_horas["IR"]])
    df_rad_horas["GL"] = df_rad_horas["GL"] *10/3.6
    df_rad_horas["UVB"] = df_rad_horas["UVB"] *1/(3.6*1000)
    df_rad_horas["IR"] = df_rad_horas["IR"] *10/3.6
    
    # Na's are eliminated
    
    try:
        df_rad_horas.fillna({'IR': df_rad_horas["IR"].mean(), "UVB": df_rad_horas["UVB"].mean()}, inplace = True)
    except:
        df_rad_horas.fillna({'IR': 0, "UVB": 0}, inplace = True)
    
    df_rad_horas = df_rad_horas.fillna(0)
    
    # Possible duplicated rows are eliminated
    
    df_rad_horas = df_rad_horas.drop_duplicates(['fecha', 'hora', "indicativo"], keep = 'first')
    df_rad_horas.reset_index(drop = True, inplace = True)

    
    return df_rad_horas

In [32]:
# The function is called
try:
    df_aemet_clean = rad_aemet_clean(df_rad_aemet)
except:
    fallo = 1
    
df_aemet_clean.head()

Unnamed: 0,fecha,hora,estacion,indicativo,GL,UVB,IR
0,2021-07-20,4,A CORUÑA,1387,0.0,0.0,391.666667
1,2021-07-20,5,A CORUÑA,1387,8.333333,0.008889,391.666667
2,2021-07-20,6,A CORUÑA,1387,33.333333,0.0325,391.666667
3,2021-07-20,7,A CORUÑA,1387,63.888889,0.726944,394.444444
4,2021-07-20,8,A CORUÑA,1387,122.222222,1.539722,394.444444


## Radiation data from two days before

These data are obtained from the CAMS Radiation Service portal of the European Union (http://www.soda-pro.com/web-services/radiation/cams-radiation-service). **In UTC hour.** Provide radiation for any date up to 2 days before the call (3 day delay). The fields obtained for each day are:

- ``Observation period``: Beginning/end of the time period with the format "yyyy-mm-ddTHH:MM:SS.S/yyyy-mm-ddTHH:MM:SS.S"
- ``TOA``: Irradiation on horizontal plane at the top of atmosphere (Wh/m2) computed from Solar Geometry 2
- ``Clear sky GHI``: Clear sky global irradiation on horizontal plane at ground level (Wh/m2)
- ``Clear sky BHI``: Clear sky beam irradiation on horizontal plane at ground level (Wh/m2)
- ``Clear sky DHI``: Clear sky diffuse irradiation on horizontal plane at ground level (Wh/m2)
- ``Clear sky BNI``: Clear sky beam irradiation on mobile plane following the sun at normal incidence (Wh/m2)
- ``GHI``: Global irradiation on horizontal plane at ground level (Wh/m2)
- ``BHI``: Beam irradiation on horizontal plane at ground level (Wh/m2)
- ``DHI``: Diffuse irradiation on horizontal plane at ground level (Wh/m2)
- ``BNI``: Beam irradiation on mobile plane following the sun at normal incidence (Wh/m2)
- ``Reliability``: Proportion of reliable data in the summarization (0-1)

Hour X contains the data elapsed between X:00 and X:59

In [33]:
def soda_clean(df_datos):
    
    # Unnecessary columns are removed
    
    df_datos.drop(['dateEnds'], axis=1, inplace = True)
    df_datos.drop(['toa'], axis=1, inplace = True)
    df_datos.drop(['cs_ghi'], axis=1, inplace = True)
    df_datos.drop(['cs_bhi'], axis=1, inplace = True)
    df_datos.drop(['cs_dhi'], axis=1, inplace = True)
    df_datos.drop(['cs_bni'], axis=1, inplace = True)
    df_datos.drop(['bhi'], axis=1, inplace = True)
    df_datos.drop(['dhi'], axis=1, inplace = True)
    df_datos.drop(['bni'], axis=1, inplace = True)
    df_datos.drop(['reliability'], axis=1, inplace = True)
    
    # NAs are eliminated
    df_datos = df_datos.fillna(0)
    
    # Columns are converted to the correct data types
    
    df_datos['dateBegins'] = pd.to_datetime(df_datos['dateBegins'])
    df_datos = df_datos.rename(columns={'dateBegins':'date'})
    df_datos['hora'] = pd.to_datetime(df_datos['date']).dt.hour
    df_datos['fecha'] = [str(a)[0:10] for a in df_datos['date']]
    df_datos = df_datos[(df_datos["hora"] < hora_fin) & (df_datos["hora"] >= hora_ini)]
    df_datos.reset_index(drop = True, inplace = True)
    
    # Possible duplicated rows are eliminated
    
    df_datos = df_datos.drop_duplicates(["date", 'fecha', 'hora', "estacion"], keep = 'first')
    df_datos.reset_index(drop = True, inplace = True)
    df_datos.head()

    
    return df_datos

In [34]:
# The function is called
try:
    df_soda_clean = soda_clean(df_soda)
except:
    fallo = 1
    
df_soda_clean.head()

Unnamed: 0,date,ghi,estacion,hora,fecha
0,2021-07-18 04:00:00,2.2482,41.292777777777782.0700000000000003,4,2021-07-18
1,2021-07-18 05:00:00,80.5099,41.292777777777782.0700000000000003,5,2021-07-18
2,2021-07-18 06:00:00,247.3058,41.292777777777782.0700000000000003,6,2021-07-18
3,2021-07-18 07:00:00,438.2921,41.292777777777782.0700000000000003,7,2021-07-18
4,2021-07-18 08:00:00,619.5929,41.292777777777782.0700000000000003,8,2021-07-18


# Generation of the rows with the days in columns

For each day that data is collected, there should only be one row per hour and station. For example, for historical weather data for the last 5 days, the data for each day must be a column associated with the hours of the day the data frame was downloaded (for each station, from 4:00 to 19:00, columns with the data from the previous day, columns with those of the previous one ...)

## Historical weather data

In [35]:
df_clima_total = df_clima_clean
df_clima_total.head()

Unnamed: 0,date,hour,fecha_prediccion,estacion,temp,feels_like,pressure,humidity,dew_point,clouds,visibility,wind_speed,wind_deg,wind_gust,we
0,2021-07-19,4,2021-07-20,41.292777777777782.0700000000000003,297.01,297.17,1012,66,290.28,0,10000,1.54,280,3.628333,800
1,2021-07-19,5,2021-07-20,41.292777777777782.0700000000000003,297.0,297.16,1012,66,290.27,20,10000,3.09,300,3.628333,801
2,2021-07-19,6,2021-07-20,41.292777777777782.0700000000000003,298.63,298.85,1012,62,290.82,0,10000,3.6,290,3.628333,800
3,2021-07-19,7,2021-07-20,41.292777777777782.0700000000000003,300.3,301.12,1012,56,290.77,0,10000,3.6,290,3.628333,800
4,2021-07-19,8,2021-07-20,41.292777777777782.0700000000000003,301.74,302.87,1012,55,291.82,0,10000,1.03,0,3.628333,800


Rows are generated with every day of each call. The columns are labeled based on the day (d-1, d-2 ...)>

In [36]:
columnas_1 = [col for col in df_clima_total.columns[1:4]] + [str(col+"_d-1") for col in df_clima_total.columns[4:]]
columnas_2 = [col for col in df_clima_total.columns[1:4]] + [str(col+"_d-2") for col in df_clima_total.columns[4:]]
columnas_3 = [col for col in df_clima_total.columns[1:4]] + [str(col+"_d-3") for col in df_clima_total.columns[4:]]
columnas_4 = [col for col in df_clima_total.columns[1:4]] + [str(col+"_d-4") for col in df_clima_total.columns[4:]]
columnas_5 = [col for col in df_clima_total.columns[1:4]] + [str(col+"_d-5") for col in df_clima_total.columns[4:]]

In [37]:
df_clima_dias_1 = pd.DataFrame(columns = columnas_1)
df_clima_dias_2 = pd.DataFrame(columns = columnas_2)
df_clima_dias_3 = pd.DataFrame(columns = columnas_3)
df_clima_dias_4 = pd.DataFrame(columns = columnas_4)
df_clima_dias_5 = pd.DataFrame(columns = columnas_5)

for i, fila in df_clima_total.iterrows():
    
    if (i in list(range(0, len(df_clima_total["date"]), 5000))) | (i == len(df_clima_total["date"])-1):
        print("Procesando fila {} de {}".format(i, len(df_clima_total["date"])))
        print("La cantidad de filas de los datasets (aproximadamente 1/5) es {}".format(len(df_clima_dias_1["hour"])))

    
    # For each hourly row, it is detected what day it belongs to and it is attached to the corresponding dataset
    if (pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == 1:
        df_clima_dias_1.loc[len(df_clima_dias_1["fecha_prediccion"])] = [elem for elem in fila][1:]

    if (pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == 2:
        df_clima_dias_2.loc[len(df_clima_dias_2["fecha_prediccion"])] = [elem for elem in fila][1:]
    
    if (pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == 3:
        df_clima_dias_3.loc[len(df_clima_dias_3["fecha_prediccion"])] = [elem for elem in fila][1:]
        
    if (pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == 4:
        df_clima_dias_4.loc[len(df_clima_dias_4["fecha_prediccion"])] = [elem for elem in fila][1:]
        
    if (pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == 5:
        df_clima_dias_5.loc[len(df_clima_dias_5["fecha_prediccion"])] = [elem for elem in fila][1:]
        

df_clima_dias_1.head()

Procesando fila 0 de 80
La cantidad de filas de los datasets (aproximadamente 1/5) es 0
Procesando fila 79 de 80
La cantidad de filas de los datasets (aproximadamente 1/5) es 16


Unnamed: 0,hour,fecha_prediccion,estacion,temp_d-1,feels_like_d-1,pressure_d-1,humidity_d-1,dew_point_d-1,clouds_d-1,visibility_d-1,wind_speed_d-1,wind_deg_d-1,wind_gust_d-1,we_d-1
0,4,2021-07-20,41.292777777777782.0700000000000003,297.01,297.17,1012,66,290.28,0,10000,1.54,280,3.628333,800
1,5,2021-07-20,41.292777777777782.0700000000000003,297.0,297.16,1012,66,290.27,20,10000,3.09,300,3.628333,801
2,6,2021-07-20,41.292777777777782.0700000000000003,298.63,298.85,1012,62,290.82,0,10000,3.6,290,3.628333,800
3,7,2021-07-20,41.292777777777782.0700000000000003,300.3,301.12,1012,56,290.77,0,10000,3.6,290,3.628333,800
4,8,2021-07-20,41.292777777777782.0700000000000003,301.74,302.87,1012,55,291.82,0,10000,1.03,0,3.628333,800


They are joined by prediction date, station and hour, to generate rows for each call day, time and station

In [38]:
df_total = pd.merge(df_clima_dias_1, df_clima_dias_2, how = "inner", on = ["hour", "fecha_prediccion", "estacion"])
df_total = pd.merge(df_total, df_clima_dias_3, how = "inner", on = ["hour", "fecha_prediccion", "estacion"])
df_total = pd.merge(df_total, df_clima_dias_4, how = "inner", on = ["hour", "fecha_prediccion", "estacion"])
df_total = pd.merge(df_total, df_clima_dias_5, how = "inner", on = ["hour", "fecha_prediccion", "estacion"])
df_total.head()

Unnamed: 0,hour,fecha_prediccion,estacion,temp_d-1,feels_like_d-1,pressure_d-1,humidity_d-1,dew_point_d-1,clouds_d-1,visibility_d-1,wind_speed_d-1,wind_deg_d-1,wind_gust_d-1,we_d-1,temp_d-2,feels_like_d-2,pressure_d-2,humidity_d-2,dew_point_d-2,clouds_d-2,visibility_d-2,wind_speed_d-2,wind_deg_d-2,wind_gust_d-2,we_d-2,temp_d-3,feels_like_d-3,pressure_d-3,humidity_d-3,dew_point_d-3,clouds_d-3,visibility_d-3,wind_speed_d-3,wind_deg_d-3,wind_gust_d-3,we_d-3,temp_d-4,feels_like_d-4,pressure_d-4,humidity_d-4,dew_point_d-4,clouds_d-4,visibility_d-4,wind_speed_d-4,wind_deg_d-4,wind_gust_d-4,we_d-4,temp_d-5,feels_like_d-5,pressure_d-5,humidity_d-5,dew_point_d-5,clouds_d-5,visibility_d-5,wind_speed_d-5,wind_deg_d-5,wind_gust_d-5,we_d-5
0,4,2021-07-20,41.292777777777782.0700000000000003,297.01,297.17,1012,66,290.28,0,10000,1.54,280,3.628333,800,293.92,294.77,1014,78,289.95,20,10000,2.06,310,3.628333,801,294.68,295.19,1017,88,292.61,20,10000,2.57,320,3.628333,801,293.39,293.35,1018,83,290.41,20,10000,3.6,320,3.628333,801,297.45,297.58,1019,63,289.96,17,10000,3.8,156,3.43,500
1,5,2021-07-20,41.292777777777782.0700000000000003,297.0,297.16,1012,66,290.27,20,10000,3.09,300,3.628333,801,294.21,295.18,1013,78,290.23,20,10000,2.06,310,3.628333,801,294.53,295.02,1017,88,292.46,20,10000,2.57,320,3.628333,801,293.34,293.64,1018,83,290.36,20,10000,3.09,340,3.628333,801,297.45,297.58,1019,63,289.96,17,10000,3.8,156,3.43,500
2,6,2021-07-20,41.292777777777782.0700000000000003,298.63,298.85,1012,62,290.82,0,10000,3.6,290,3.628333,800,295.34,296.41,1013,78,291.32,20,10000,2.57,340,3.628333,801,296.0,296.53,1017,84,293.15,20,10000,2.06,340,3.628333,801,294.77,295.6,1018,78,290.77,40,10000,2.57,330,3.628333,802,297.45,297.58,1019,63,289.96,17,10000,3.8,156,3.43,500
3,7,2021-07-20,41.292777777777782.0700000000000003,300.3,301.12,1012,56,290.77,0,10000,3.6,290,3.628333,800,296.99,298.62,1014,69,290.96,20,10000,1.54,300,3.628333,801,297.29,297.82,1017,79,293.42,20,10000,1.03,280,3.628333,801,296.42,298.18,1018,69,290.42,20,10000,1.03,0,3.628333,801,297.45,297.58,1019,63,289.96,17,10000,3.8,156,3.43,500
4,8,2021-07-20,41.292777777777782.0700000000000003,301.74,302.87,1012,55,291.82,0,10000,1.03,0,3.628333,800,298.71,301.13,1013,73,293.51,20,10000,2.06,230,3.628333,801,298.5,299.1,1017,77,294.18,40,10000,3.6,200,3.628333,802,297.29,298.68,1018,69,291.25,20,10000,2.06,180,3.628333,801,297.45,297.58,1019,63,289.96,17,10000,3.8,156,3.43,500


In [39]:
df_clima_clean = df_total

## Weather prediction data

In [40]:
df_pred_total = df_pred_clean
df_pred_total.head()

Unnamed: 0,date,hour,fecha_prediccion,estacion,temp,feels_like,pressure,humidity,dew_point,uvi,clouds,visibility,wind_speed,wind_deg,wind_gust,pop,we
0,2021-07-20,19,2021-07-20,41.292777777777782.0700000000000003,299.65,299.65,1015,78,295.5,0.0,6,10000,3.77,232,5.42,0.14,800
1,2021-07-21,4,2021-07-20,41.292777777777782.0700000000000003,298.56,298.98,1016,70,292.38,0.0,6,10000,2.25,255,2.81,0.0,800
2,2021-07-21,5,2021-07-20,41.292777777777782.0700000000000003,298.58,298.98,1017,69,292.05,0.0,10,10000,2.07,255,2.4,0.0,800
3,2021-07-21,6,2021-07-20,41.292777777777782.0700000000000003,298.85,299.23,1017,67,291.8,0.45,9,10000,2.55,251,3.03,0.0,800
4,2021-07-21,7,2021-07-20,41.292777777777782.0700000000000003,299.3,299.3,1018,65,291.7,1.41,1,10000,2.5,243,2.88,0.0,800


Rows are generated with every day of each call. The days are classified into 2 groups. The first will contain the hours of the day when the data is obtained and of two days later. The second is the data for the day after the data is obtained. As the predictions for the 48 hours after the call are downloaded, these two groups will each have 14 values for each day and location (16 after filtering useful hours)

In [41]:
columnas_1 = [col for col in df_pred_total.columns[1:4]] + [str(col+"_pred_1") for col in df_pred_total.columns[4:]]
columnas_2 = [col for col in df_pred_total.columns[1:4]] + [str(col+"_pred_2") for col in df_pred_total.columns[4:]]

In [42]:
df_pred_dias_1 = pd.DataFrame(columns = columnas_1)
df_pred_dias_2 = pd.DataFrame(columns = columnas_2)

for i, fila in df_pred_total.iterrows():
    
    if (i in list(range(0,len(df_pred_total["date"]),5000))) | (i == len(df_pred_total["date"])-1):
        print("Procesando fila {} de {}".format(i, len(df_pred_total["date"])))
        print("La cantidad de filas de los datasets (aproximadamente 1/2) es {}".format(len(df_pred_dias_1["hour"])))
       
    # For each hourly row, the day it belongs to is detected and it is attached to the corresponding dataset
    if ((pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == 0) | ((pd.to_datetime(fila["fecha_prediccion"]) - pd.to_datetime(fila["date"])).days == -2):
        df_pred_dias_1.loc[len(df_pred_dias_1["fecha_prediccion"])] = [elem for elem in fila][1:] 
        
    if (pd.to_datetime(fila["date"]) - pd.to_datetime(fila["fecha_prediccion"])).days == 1:
        df_pred_dias_2.loc[len(df_pred_dias_2["fecha_prediccion"])] = [elem for elem in fila][1:]
        

df_pred_dias_2.head()

Procesando fila 0 de 32
La cantidad de filas de los datasets (aproximadamente 1/2) es 0
Procesando fila 31 de 32
La cantidad de filas de los datasets (aproximadamente 1/2) es 15


Unnamed: 0,hour,fecha_prediccion,estacion,temp_pred_2,feels_like_pred_2,pressure_pred_2,humidity_pred_2,dew_point_pred_2,uvi_pred_2,clouds_pred_2,visibility_pred_2,wind_speed_pred_2,wind_deg_pred_2,wind_gust_pred_2,pop_pred_2,we_pred_2
0,4,2021-07-20,41.292777777777782.0700000000000003,298.56,298.98,1016,70,292.38,0.0,6,10000,2.25,255,2.81,0.0,800
1,5,2021-07-20,41.292777777777782.0700000000000003,298.58,298.98,1017,69,292.05,0.0,10,10000,2.07,255,2.4,0.0,800
2,6,2021-07-20,41.292777777777782.0700000000000003,298.85,299.23,1017,67,291.8,0.45,9,10000,2.55,251,3.03,0.0,800
3,7,2021-07-20,41.292777777777782.0700000000000003,299.3,299.3,1018,65,291.7,1.41,1,10000,2.5,243,2.88,0.0,800
4,8,2021-07-20,41.292777777777782.0700000000000003,299.78,299.78,1018,63,291.65,3.03,0,10000,2.64,228,2.77,0.0,800


The one by prediction date, station and hour, to generate rows for each call day, time and station

In [43]:
df_pred_dias_1 = df_pred_dias_1.drop_duplicates(['hour', "fecha_prediccion", "estacion"],
                        keep = 'first')
df_pred_dias_1.reset_index(drop = True, inplace = True)
df_pred_dias_2 = df_pred_dias_2.drop_duplicates(['hour', "fecha_prediccion", "estacion"],
                        keep = 'first')
df_pred_dias_2.reset_index(drop = True, inplace = True)

df_total_previo = pd.merge(df_pred_dias_1, df_pred_dias_2, how = "inner", on = ["hour", "fecha_prediccion", "estacion"])
df_total_previo.head()

Unnamed: 0,hour,fecha_prediccion,estacion,temp_pred_1,feels_like_pred_1,pressure_pred_1,humidity_pred_1,dew_point_pred_1,uvi_pred_1,clouds_pred_1,visibility_pred_1,wind_speed_pred_1,wind_deg_pred_1,wind_gust_pred_1,pop_pred_1,we_pred_1,temp_pred_2,feels_like_pred_2,pressure_pred_2,humidity_pred_2,dew_point_pred_2,uvi_pred_2,clouds_pred_2,visibility_pred_2,wind_speed_pred_2,wind_deg_pred_2,wind_gust_pred_2,pop_pred_2,we_pred_2
0,19,2021-07-20,41.292777777777782.0700000000000003,299.65,299.65,1015,78,295.5,0.0,6,10000,3.77,232,5.42,0.14,800,299.86,301.39,1018,68,292.93,0.0,1,10000,2.04,204,2.24,0.0,800
1,4,2021-07-20,41.292777777777782.0700000000000003,299.08,299.4,1017,64,291.36,0.0,1,10000,3.54,264,4.17,0.0,800,298.56,298.98,1016,70,292.38,0.0,6,10000,2.25,255,2.81,0.0,800
2,5,2021-07-20,41.292777777777782.0700000000000003,298.95,299.26,1017,64,291.2,0.0,0,10000,3.54,265,4.3,0.0,800,298.58,298.98,1017,69,292.05,0.0,10,10000,2.07,255,2.4,0.0,800
3,6,2021-07-20,41.292777777777782.0700000000000003,299.21,299.21,1017,63,291.14,0.47,0,10000,3.46,263,4.26,0.0,800,298.85,299.23,1017,67,291.8,0.45,9,10000,2.55,251,3.03,0.0,800
4,7,2021-07-20,41.292777777777782.0700000000000003,299.85,300.92,1018,61,291.11,1.45,0,10000,3.48,254,4.45,0.0,800,299.3,299.3,1018,65,291.7,1.41,1,10000,2.5,243,2.88,0.0,800


For each day and season, we obtain 2 prediction values for each hour (two for 4:00, two for 5:00...) corresponding to the prediction of the 48 hours following the call. Then the dataset with the average prediction value of each hour is generated

In [44]:
columnas_total = [col for col in df_pred_total.columns[1:4]] + [str(col+"_pred") for col in df_pred_total.columns[4:]]
df_total = pd.DataFrame(columns = columnas_total)

for i, fila in df_total_previo.iterrows():
    
    if (i in list(range(0,len(df_total_previo["hour"]),5000))) | (i == len(df_total_previo["hour"])-1):
        print("Procesando fila {} de {}".format(i, len(df_total_previo["hour"])))
       
    # For each hour, the average of the data of the two predictions is obtained
    fila_nueva = []
    df_new = pd.DataFrame()
    for j in range(0, len(columnas_total)):
        if j in [0,1,2]:
            fila_nueva.append(fila[j]) 
        else:
            fila_nueva.append(np.mean([fila[j], fila[j + (int((len(columnas_total)-3)))]]))
    df_new = pd.DataFrame([tuple(fila_nueva)], columns = columnas_total)
    df_total = df_total.append(df_new, ignore_index = True)
    
    
df_total.head()

Procesando fila 0 de 16
Procesando fila 15 de 16


Unnamed: 0,hour,fecha_prediccion,estacion,temp_pred,feels_like_pred,pressure_pred,humidity_pred,dew_point_pred,uvi_pred,clouds_pred,visibility_pred,wind_speed_pred,wind_deg_pred,wind_gust_pred,pop_pred,we_pred
0,19,2021-07-20,41.292777777777782.0700000000000003,299.755,300.52,1016.5,73.0,294.215,0.0,3.5,10000.0,2.905,218.0,3.83,0.07,800.0
1,4,2021-07-20,41.292777777777782.0700000000000003,298.82,299.19,1016.5,67.0,291.87,0.0,3.5,10000.0,2.895,259.5,3.49,0.0,800.0
2,5,2021-07-20,41.292777777777782.0700000000000003,298.765,299.12,1017.0,66.5,291.625,0.0,5.0,10000.0,2.805,260.0,3.35,0.0,800.0
3,6,2021-07-20,41.292777777777782.0700000000000003,299.03,299.22,1017.0,65.0,291.47,0.46,4.5,10000.0,3.005,257.0,3.645,0.0,800.0
4,7,2021-07-20,41.292777777777782.0700000000000003,299.575,300.11,1018.0,63.0,291.405,1.43,0.5,10000.0,2.99,248.5,3.665,0.0,800.0


In [45]:
df_pred_clean = df_total

# Preparación de los datos
<div style = "float:right"><a style="text-decoration:none" href = "#Script-funcional">

In [46]:
def merge_datasets(df_estaciones_rad, df_clima, df_pred, df_aemet, df_soda):
    
    ## WEATHER ##
    print("Processing WEATHER")
    
    # The column of dates of the day to be predicted is added to the climatological dataset
    import datetime
    fechas_atrasadas = ["{}-{}-{}".format(str((pd.to_datetime(f) + datetime.timedelta(days=1)).year), str((pd.to_datetime(f) + datetime.timedelta(days=1)).month).zfill(2), str((pd.to_datetime(f) + datetime.timedelta(days=1)).day).zfill(2)) for f in df_clima["fecha_prediccion"]]
    df_clima.insert(0, "fecha_rad", fechas_atrasadas, True)
    
    # Columns are renamed and temperatures are converted to degrees Cº
    df_clima = df_clima.rename(index = str, columns = {"hour": "hora", "estacion": "indicativo"})

    df_clima["temp_d-1"] = df_clima["temp_d-1"] - 273.15
    df_clima["temp_d-2"] = df_clima["temp_d-2"] - 273.15
    df_clima["temp_d-3"] = df_clima["temp_d-3"] - 273.15
    df_clima["temp_d-4"] = df_clima["temp_d-4"] - 273.15
    df_clima["temp_d-5"] = df_clima["temp_d-5"] - 273.15
    df_clima["feels_like_d-1"] = df_clima["feels_like_d-1"] - 273.15
    df_clima["feels_like_d-2"] = df_clima["feels_like_d-2"] - 273.15
    df_clima["feels_like_d-3"] = df_clima["feels_like_d-3"] - 273.15
    df_clima["feels_like_d-4"] = df_clima["feels_like_d-4"] - 273.15
    df_clima["feels_like_d-5"] = df_clima["feels_like_d-5"] - 273.15
    
    # The new dataframe is created with the target column of ambient temperature
    df_objetivos = pd.DataFrame(columns = ["hora", "indicativo", "temp_objetivo"])
    df_objetivos["hora"] = df_clima["hora"]
    df_objetivos["indicativo"] = df_clima["indicativo"]
    df_objetivos["temp_objetivo"] = df_clima["temp_d-1"]

    # The column of dates of the day to be predicted is added to the climatological dataset
    fechas_atrasadas = ["{}-{}-{}".format(str((pd.to_datetime(f) - datetime.timedelta(days=1)).year), str((pd.to_datetime(f) - datetime.timedelta(days=1)).month).zfill(2), str((pd.to_datetime(f) - datetime.timedelta(days=1)).day).zfill(2)) for f in df_clima["fecha_prediccion"]]
    df_objetivos.insert(0, "fecha_rad", fechas_atrasadas, True)
    
    # Unnecessary columns are removed
    df_clima.drop(['fecha_prediccion'], axis = 1, inplace = True)
    
    # The column of indicative codes of the nearest radiation stations is added     
    df_clima["indicativo_rad"] = np.nan
    df_clima["lat"] = np.nan
    df_clima["lon"] = np.nan

    for i, fila in df_clima.iterrows():
        
        df_clima.loc[i, "lat"] = lat
        df_clima.loc[i, "lon"] = lon

        dist = 99999999999999999999

        # For each row, the nearest radiation station is obtained
        for k in range(0, len(df_estaciones_rad["indicativo"])): 
            lat_est = conversor_coordenadas(str(df_estaciones_rad["latitud"].loc[k]))
            lon_est = conversor_coordenadas(str(df_estaciones_rad["longitud"].loc[k]))

            distancia_prueba = distancia(lat, lon, lat_est, lon_est)
            if(distancia_prueba < dist):
                dist = distancia_prueba
                df_clima.loc[i, "indicativo_rad"] = df_estaciones_rad.loc[k, "indicativo"]

    ## PREDICTION ##  
    print("Processing PREDICTION")
    
    # Temperature variables are converted to degrees Cº         
    df_pred["temp_pred"] = df_pred["temp_pred"] - 273.15
    df_pred["feels_like_pred"] = df_pred["feels_like_pred"] - 273.15
    
    # The column of dates of the day to be predicted is added to the dataset of climatological predictions
    fechas_atrasadas = ["{}-{}-{}".format(str((pd.to_datetime(f) + datetime.timedelta(days=1)).year), str((pd.to_datetime(f) + datetime.timedelta(days=1)).month).zfill(2), str((pd.to_datetime(f) + datetime.timedelta(days=1)).day).zfill(2)) for f in df_pred["fecha_prediccion"]]
    df_pred.insert(0, "fecha_rad", fechas_atrasadas, True)
    
    # Columns are renamed and unnecessary ones are removed
    df_pred = df_pred.rename(index = str, columns = {"hour": "hora", "estacion": "indicativo"})
    df_pred.drop(['fecha_prediccion'], axis = 1, inplace = True)
    
    ## PREVIOUS DAY RADIATION ##
    print("Processing PREVIOUS DAY RADIATION")
    
    # The column of dates of the day to be predicted is added to the AEMET radiation dataset
    df_aemet = df_aemet.rename(columns={'indicativo':'indicativo_rad'})
    fechas_atrasadas = ["{}-{}-{}".format(str((pd.to_datetime(f) + datetime.timedelta(days=1)).year), str((pd.to_datetime(f) + datetime.timedelta(days=1)).month).zfill(2), str((pd.to_datetime(f) + datetime.timedelta(days=1)).day).zfill(2)) for f in df_aemet["fecha"]]
    df_aemet.insert(0, "fecha_rad", fechas_atrasadas, True)
    
    # Columns are renamed
    df_aemet = df_aemet.rename(columns={'GL': 'rad_d-1', 'UVB': 'uvb_d-1', 'IR': 'ir_d-1'})
    
    # Unnecessary columns are removed
    df_aemet.drop(['fecha'], axis = 1, inplace = True)
    df_aemet.drop(['estacion'], axis = 1, inplace = True)
    
    ## RADIATION ##
    print("Processing RADIATION")
    
    # Columns are renamed and unnecessary ones are removed
    df_soda = df_soda.rename(index = str, columns = {"estacion": "indicativo", "fecha": "fecha_rad"})
    df_soda.drop(['date'], axis=1, inplace = True)
    
    # A dataset is created that contains the radiation variable from three days before
    df_rad_2 = pd.DataFrame(columns = ["indicativo", "hora", "rad_d-2"])
    df_rad_2["indicativo"] = df_soda["indicativo"]
    df_rad_2["hora"] = df_soda["hora"]
    df_rad_2["rad_d-2"] = df_soda["ghi"]
    
    # The column of dates of the day to be predicted is added to the radiation dataset of three days before the predicted one
    fechas_atrasadas = ["{}-{}-{}".format(str((pd.to_datetime(f) + datetime.timedelta(days=3)).year), str((pd.to_datetime(f) + datetime.timedelta(days=3)).month).zfill(2), str((pd.to_datetime(f) + datetime.timedelta(days=3)).day).zfill(2)) for f in df_soda["fecha_rad"]]
    df_rad_2.insert(0, "fecha_rad", fechas_atrasadas, True)
    
    ## MERGE ##
    print("Processing MERGE")
    
    # Datasets are joined
    df_total = pd.merge(df_clima, df_pred, how = "inner", on = ["fecha_rad", "hora", "indicativo"])
    df_total = pd.merge(df_total, df_aemet, how = "inner", on = ["fecha_rad", "hora", "indicativo_rad"])
    df_total = pd.merge(df_total, df_rad_2, how = "inner", on = ["fecha_rad", "hora", "indicativo"])
    
    df_total.drop(['indicativo_rad'], axis = 1, inplace = True)
    
    df_total.columns = [str(i) for i in df_total.columns]

    
    return df_total
    

In [47]:
# The climate data dataset is imported
df_clima = df_clima_clean

# The climate prediction data dataset is imported
df_pred = df_pred_clean

# AEMET radiation dataset is imported
df_aemet = df_aemet_clean

# The SODA radiation dataset is imported
df_soda = df_soda_clean

try:
    df_total = merge_datasets(df_estaciones_rad, df_clima, df_pred, df_aemet, df_soda)
except:
    fallo = 1
    
df_total["hora"] = pd.to_numeric([int(c) for c in df_total["hora"]])
for i in range(3, len(df_total.columns)):
    df_total[df_total.columns[i]] = pd.to_numeric([float(c) for c in df_total[df_total.columns[i]]])

df_total.head()

Processing WEATHER
Processing PREDICTION
Processing PREVIOUS DAY RADIATION
Processing RADIATION
Processing MERGE


Unnamed: 0,fecha_rad,hora,indicativo,temp_d-1,feels_like_d-1,pressure_d-1,humidity_d-1,dew_point_d-1,clouds_d-1,visibility_d-1,wind_speed_d-1,wind_deg_d-1,wind_gust_d-1,we_d-1,temp_d-2,feels_like_d-2,pressure_d-2,humidity_d-2,dew_point_d-2,clouds_d-2,visibility_d-2,wind_speed_d-2,wind_deg_d-2,wind_gust_d-2,we_d-2,temp_d-3,feels_like_d-3,pressure_d-3,humidity_d-3,dew_point_d-3,clouds_d-3,visibility_d-3,wind_speed_d-3,wind_deg_d-3,wind_gust_d-3,we_d-3,temp_d-4,feels_like_d-4,pressure_d-4,humidity_d-4,dew_point_d-4,clouds_d-4,visibility_d-4,wind_speed_d-4,wind_deg_d-4,wind_gust_d-4,we_d-4,temp_d-5,feels_like_d-5,pressure_d-5,humidity_d-5,dew_point_d-5,clouds_d-5,visibility_d-5,wind_speed_d-5,wind_deg_d-5,wind_gust_d-5,we_d-5,lat,lon,temp_pred,feels_like_pred,pressure_pred,humidity_pred,dew_point_pred,uvi_pred,clouds_pred,visibility_pred,wind_speed_pred,wind_deg_pred,wind_gust_pred,pop_pred,we_pred,rad_d-1,uvb_d-1,ir_d-1,rad_d-2
0,2021-07-21,4,41.292777777777782.0700000000000003,23.86,24.02,1012.0,66.0,290.28,0.0,10000.0,1.54,280.0,3.628333,800.0,20.77,21.62,1014.0,78.0,289.95,20.0,10000.0,2.06,310.0,3.628333,801.0,21.53,22.04,1017.0,88.0,292.61,20.0,10000.0,2.57,320.0,3.628333,801.0,20.24,20.2,1018.0,83.0,290.41,20.0,10000.0,3.6,320.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,25.67,26.04,1016.5,67.0,291.87,0.0,3.5,10000.0,2.895,259.5,3.49,0.0,800.0,2.777778,0.0,362.84912,2.2482
1,2021-07-21,5,41.292777777777782.0700000000000003,23.85,24.01,1012.0,66.0,290.27,20.0,10000.0,3.09,300.0,3.628333,801.0,21.06,22.03,1013.0,78.0,290.23,20.0,10000.0,2.06,310.0,3.628333,801.0,21.38,21.87,1017.0,88.0,292.46,20.0,10000.0,2.57,320.0,3.628333,801.0,20.19,20.49,1018.0,83.0,290.36,20.0,10000.0,3.09,340.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,25.615,25.97,1017.0,66.5,291.625,0.0,5.0,10000.0,2.805,260.0,3.35,0.0,800.0,86.111111,0.023056,362.84912,80.5099
2,2021-07-21,6,41.292777777777782.0700000000000003,25.48,25.7,1012.0,62.0,290.82,0.0,10000.0,3.6,290.0,3.628333,800.0,22.19,23.26,1013.0,78.0,291.32,20.0,10000.0,2.57,340.0,3.628333,801.0,22.85,23.38,1017.0,84.0,293.15,20.0,10000.0,2.06,340.0,3.628333,801.0,21.62,22.45,1018.0,78.0,290.77,40.0,10000.0,2.57,330.0,3.628333,802.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,25.88,26.07,1017.0,65.0,291.47,0.46,4.5,10000.0,3.005,257.0,3.645,0.0,800.0,275.0,1.005278,362.84912,247.3058
3,2021-07-21,7,41.292777777777782.0700000000000003,27.15,27.97,1012.0,56.0,290.77,0.0,10000.0,3.6,290.0,3.628333,800.0,23.84,25.47,1014.0,69.0,290.96,20.0,10000.0,1.54,300.0,3.628333,801.0,24.14,24.67,1017.0,79.0,293.42,20.0,10000.0,1.03,280.0,3.628333,801.0,23.27,25.03,1018.0,69.0,290.42,20.0,10000.0,1.03,0.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,26.425,26.96,1018.0,63.0,291.405,1.43,0.5,10000.0,2.99,248.5,3.665,0.0,800.0,463.888889,2.628056,362.84912,438.2921
4,2021-07-21,8,41.292777777777782.0700000000000003,28.59,29.72,1012.0,55.0,291.82,0.0,10000.0,1.03,0.0,3.628333,800.0,25.56,27.98,1013.0,73.0,293.51,20.0,10000.0,2.06,230.0,3.628333,801.0,25.35,25.95,1017.0,77.0,294.18,40.0,10000.0,3.6,200.0,3.628333,802.0,24.14,25.53,1018.0,69.0,291.25,20.0,10000.0,2.06,180.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,26.95,27.53,1018.0,61.5,291.41,3.09,0.0,10000.0,3.32,235.0,3.895,0.0,800.0,641.666667,49.481667,362.84912,619.5929


# Radiation prediction
<div style = "float:right"><a style="text-decoration:none" href = "#Functional-script">

In [48]:
df_datos = df_total
df_datos = df_datos[["hora"] + list(df_datos.columns)[3:]]
df_datos.head()

Unnamed: 0,hora,temp_d-1,feels_like_d-1,pressure_d-1,humidity_d-1,dew_point_d-1,clouds_d-1,visibility_d-1,wind_speed_d-1,wind_deg_d-1,wind_gust_d-1,we_d-1,temp_d-2,feels_like_d-2,pressure_d-2,humidity_d-2,dew_point_d-2,clouds_d-2,visibility_d-2,wind_speed_d-2,wind_deg_d-2,wind_gust_d-2,we_d-2,temp_d-3,feels_like_d-3,pressure_d-3,humidity_d-3,dew_point_d-3,clouds_d-3,visibility_d-3,wind_speed_d-3,wind_deg_d-3,wind_gust_d-3,we_d-3,temp_d-4,feels_like_d-4,pressure_d-4,humidity_d-4,dew_point_d-4,clouds_d-4,visibility_d-4,wind_speed_d-4,wind_deg_d-4,wind_gust_d-4,we_d-4,temp_d-5,feels_like_d-5,pressure_d-5,humidity_d-5,dew_point_d-5,clouds_d-5,visibility_d-5,wind_speed_d-5,wind_deg_d-5,wind_gust_d-5,we_d-5,lat,lon,temp_pred,feels_like_pred,pressure_pred,humidity_pred,dew_point_pred,uvi_pred,clouds_pred,visibility_pred,wind_speed_pred,wind_deg_pred,wind_gust_pred,pop_pred,we_pred,rad_d-1,uvb_d-1,ir_d-1,rad_d-2
0,4,23.86,24.02,1012.0,66.0,290.28,0.0,10000.0,1.54,280.0,3.628333,800.0,20.77,21.62,1014.0,78.0,289.95,20.0,10000.0,2.06,310.0,3.628333,801.0,21.53,22.04,1017.0,88.0,292.61,20.0,10000.0,2.57,320.0,3.628333,801.0,20.24,20.2,1018.0,83.0,290.41,20.0,10000.0,3.6,320.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,25.67,26.04,1016.5,67.0,291.87,0.0,3.5,10000.0,2.895,259.5,3.49,0.0,800.0,2.777778,0.0,362.84912,2.2482
1,5,23.85,24.01,1012.0,66.0,290.27,20.0,10000.0,3.09,300.0,3.628333,801.0,21.06,22.03,1013.0,78.0,290.23,20.0,10000.0,2.06,310.0,3.628333,801.0,21.38,21.87,1017.0,88.0,292.46,20.0,10000.0,2.57,320.0,3.628333,801.0,20.19,20.49,1018.0,83.0,290.36,20.0,10000.0,3.09,340.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,25.615,25.97,1017.0,66.5,291.625,0.0,5.0,10000.0,2.805,260.0,3.35,0.0,800.0,86.111111,0.023056,362.84912,80.5099
2,6,25.48,25.7,1012.0,62.0,290.82,0.0,10000.0,3.6,290.0,3.628333,800.0,22.19,23.26,1013.0,78.0,291.32,20.0,10000.0,2.57,340.0,3.628333,801.0,22.85,23.38,1017.0,84.0,293.15,20.0,10000.0,2.06,340.0,3.628333,801.0,21.62,22.45,1018.0,78.0,290.77,40.0,10000.0,2.57,330.0,3.628333,802.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,25.88,26.07,1017.0,65.0,291.47,0.46,4.5,10000.0,3.005,257.0,3.645,0.0,800.0,275.0,1.005278,362.84912,247.3058
3,7,27.15,27.97,1012.0,56.0,290.77,0.0,10000.0,3.6,290.0,3.628333,800.0,23.84,25.47,1014.0,69.0,290.96,20.0,10000.0,1.54,300.0,3.628333,801.0,24.14,24.67,1017.0,79.0,293.42,20.0,10000.0,1.03,280.0,3.628333,801.0,23.27,25.03,1018.0,69.0,290.42,20.0,10000.0,1.03,0.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,26.425,26.96,1018.0,63.0,291.405,1.43,0.5,10000.0,2.99,248.5,3.665,0.0,800.0,463.888889,2.628056,362.84912,438.2921
4,8,28.59,29.72,1012.0,55.0,291.82,0.0,10000.0,1.03,0.0,3.628333,800.0,25.56,27.98,1013.0,73.0,293.51,20.0,10000.0,2.06,230.0,3.628333,801.0,25.35,25.95,1017.0,77.0,294.18,40.0,10000.0,3.6,200.0,3.628333,802.0,24.14,25.53,1018.0,69.0,291.25,20.0,10000.0,2.06,180.0,3.628333,801.0,24.3,24.43,1019.0,63.0,289.96,17.0,10000.0,3.8,156.0,3.43,500.0,41.292778,2.07,26.95,27.53,1018.0,61.5,291.41,3.09,0.0,10000.0,3.32,235.0,3.895,0.0,800.0,641.666667,49.481667,362.84912,619.5929


In [49]:
df_datos.shape

(16, 75)

In [50]:
# The data is standardized and PCA is applied, before predicting using the trained model
scalar = pk.load(open(directorio + 'data/Modelo/scaler_rad.pkl','rb'))
pca = pk.load(open(directorio + 'data/Modelo/pca_rad.pkl','rb'))
model = pk.load(open(directorio + 'data/Modelo/modelo_rad.pkl','rb'))

pipeline = Pipeline([('transformer', scalar), ('pca', pca), ('estimator', model)])

pred = pipeline.predict(df_datos)
pred_rad = []
[pred_rad.append(y[0]) for y in pred.tolist()]
pred_rad

[84.28783333333332,
 176.42603333333332,
 916.6212999999999,
 959.8275,
 964.2872666666666,
 964.2872666666666,
 964.2872666666666,
 964.2872666666666,
 964.2872666666666,
 964.2872666666666,
 964.2872666666666,
 964.2872666666666,
 961.2383,
 902.4588333333332,
 73.96246666666666,
 21.427366666666668]

# Ambient temperature prediction
<div style = "float:right"><a style="text-decoration:none" href = "#Functional-script">

In [51]:
# The data is standardized and PCA is applied, before predicting using the trained model
scalar = pk.load(open(directorio + 'data/Modelo/scaler_temp.pkl','rb'))
pca = pk.load(open(directorio + 'data/Modelo/pca_temp.pkl','rb'))
model = pk.load(open(directorio + 'data/Modelo/modelo_temp.pkl','rb'))

pipeline = Pipeline([('transformer', scalar), ('pca', pca), ('estimator', model)])

pred = pipeline.predict(df_datos)
pred_temp = []
[pred_temp.append(y[0]) for y in pred.tolist()]
pred_temp

[20.67500000000004,
 22.26000000000002,
 24.505000000000024,
 21.220000000000027,
 20.475000000000023,
 20.475000000000023,
 20.475000000000023,
 20.475000000000023,
 20.475000000000023,
 20.475000000000023,
 20.475000000000023,
 20.475000000000023,
 21.140000000000047,
 28.465000000000003,
 25.670000000000016,
 27.04000000000002]

# Obtención de la producción eléctrica
<div style = "float:right"><a style="text-decoration:none" href = "#Script-funcional">

In [52]:
#Data

print("lat: ", lat)
print("lon: ", lon)
print("orient: ", orient)
print("incl: ", incl)
print("ppico: ", ppico)
print("fecha: ", fecha)

lat:  41.29277777777778
lon:  2.0700000000000003
orient:  10
incl:  25
ppico:  4.62
fecha:  2021-07-20


In [53]:
# Global radiation in horizontal plane:
dia_Gh = pred_rad

# Ambient temperature:
temperatura_ambiente = pred_temp

In [54]:
produccion = calcularEnergia(lat, lon, orient, incl, ppico, fecha, dia_Gh, temperatura_ambiente)
produccion

[0,
 0,
 0,
 0,
 0,
 0,
 56.7934678306092,
 117.80387271236943,
 1902.2148615745914,
 2848.7992579735233,
 3245.69896576888,
 3453.6610366307377,
 3581.968188650417,
 3663.652467202302,
 3713.4672033450133,
 3736.398345677833,
 3729.2158373961843,
 3679.3825648929837,
 3526.477634145079,
 2893.6964519918392,
 254.76072185995625,
 14.020453273181701,
 0,
 0]

In addition, it is desired to show the user an estimate of the income that he will obtain on the day predicted from the sale of his surpluses

The date of the previous day is obtained (to obtain the 24 values in UTC, since the first value of the current day will be that of 22:00 in UTC of the previous day)

In [55]:
fecha_ant = date.today() + timedelta(days = -1)
fecha_ayer = "{}-{}-{}".format(fecha_ant.year, str(fecha_ant.month).zfill(2), str(fecha_ant.day).zfill(2))
fecha_ayer

'2021-07-19'

The date of the day before the previous one is obtained, in case the values of the current day have not yet been published

In [56]:
fecha_ant_ant = date.today() + timedelta(days = -2)
fecha_anteayer = "{}-{}-{}".format(fecha_ant_ant.year, str(fecha_ant_ant.month).zfill(2), str(fecha_ant_ant.day).zfill(2))
fecha_anteayer

'2021-07-18'

Function to obtain the compensation price curve from the REE api

In [57]:
import time
import requests
import urllib


def api_ree(indicador, token):
    url = "https://api.esios.ree.es/indicators"
    hoy = fecha
    dia_ant = fecha_ayer
    dia_ant_ant = fecha_anteayer    
    try:
        url = "https://api.esios.ree.es/indicators/" + str(indicador) + "?start_date=" + dia_ant + "T22%3A00%3A00Z&end_date=" + hoy + "T23%3A59%3A59Z"

        headers = {
            'Accept': 'application/json; application/vnd.esios-api-v1+json',
            'Host': 'api.esios.ree.es',
            'Content-Type': 'application/json',
            'Authorization': 'Token token="{}"'.format(token)
        }

        response = requests.request("GET", url, headers = headers)
        
    except:
        url = "https://api.esios.ree.es/indicators/" + str(indicador) + "?start_date=" + dia_ant_ant + "T22%3A00%3A00Z&end_date=" + dia_ant + "T23%3A59%3A59Z"

        headers = {
            'Accept': 'application/json; application/vnd.esios-api-v1+json',
            'Host': 'api.esios.ree.es',
            'Content-Type': 'application/json',
            'Authorization': 'Token token="{}"'.format(token)
        }

        response = requests.request("GET", url, headers = headers)

    precios = response.json()
    precios = precios['indicator']['values']
    precios_datos = pd.DataFrame(precios)
    precios_datos['datetime'] = pd.to_datetime(precios_datos['datetime'])
    precios_datos['dia'] = precios_datos['datetime'].dt.strftime('%m/%d/%Y')

    return precios_datos

In [58]:
# Compensation price ID definition
indicador = 1739

# Access token
token = "27089f947b3e16a296875a0bc9dc387efa41acc7db6498676668e5150028cfdb"

precios_excedente = api_ree(indicador, token)
precios_excedente.head()

Unnamed: 0,value,datetime,datetime_utc,tz_time,geo_id,geo_name,dia
0,101.09,2021-07-20 00:00:00+02:00,2021-07-19T22:00:00Z,2021-07-19T22:00:00.000Z,3,España,07/20/2021
1,99.59,2021-07-20 01:00:00+02:00,2021-07-19T23:00:00Z,2021-07-19T23:00:00.000Z,3,España,07/20/2021
2,94.86,2021-07-20 02:00:00+02:00,2021-07-20T00:00:00Z,2021-07-20T00:00:00.000Z,3,España,07/20/2021
3,93.86,2021-07-20 03:00:00+02:00,2021-07-20T01:00:00Z,2021-07-20T01:00:00.000Z,3,España,07/20/2021
4,97.43,2021-07-20 04:00:00+02:00,2021-07-20T02:00:00Z,2021-07-20T02:00:00.000Z,3,España,07/20/2021


Hourly prices in €/MWh

In [59]:
lista_precios = [y for y in precios_excedente["value"]]
lista_precios

[101.09,
 99.59,
 94.86,
 93.86,
 97.43,
 99.62,
 100.91,
 108.47,
 111.45,
 101.94,
 99.58,
 99.6,
 100.36,
 101.86,
 101.06,
 99.83,
 99.43,
 99.46,
 98.2,
 105.85,
 108.57,
 109.83,
 105.84,
 101.84,
 110.11,
 109.02]

The csv of typical consumption profiles is imported. For each hour, the relative weight of that hour is indicated (from a total of one). That is, the fraction of daily consumption that each hour represents, in average terms, for different standard profiles of electrical supplies (households)

In [60]:
perfiles = pd.read_csv('./data/perfiles_consumo.csv', sep=',')
for i in range(0,24):
    perfiles[str(i)] = pd.to_numeric(perfiles[str(i)])
perfiles

Unnamed: 0,Tipo de consumidor,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
0,Madrugadores,0.03922,0.02614,0.01961,0.01961,0.01961,0.02941,0.04248,0.05882,0.05229,0.04902,0.03922,0.03922,0.03922,0.03922,0.03922,0.04575,0.04248,0.04248,0.03922,0.04248,0.05229,0.05882,0.06536,0.05882
1,Caseros,0.0367,0.02446,0.01835,0.01835,0.01835,0.02446,0.02446,0.02752,0.0367,0.04281,0.04893,0.05199,0.05505,0.0581,0.04893,0.04587,0.04587,0.04587,0.04893,0.04893,0.05505,0.0581,0.06116,0.05505
2,Matutinos,0.06639,0.06224,0.04979,0.04149,0.03734,0.03734,0.03734,0.04979,0.06639,0.04979,0.0332,0.02905,0.0166,0.0166,0.0166,0.0166,0.0166,0.0166,0.02905,0.0332,0.04979,0.06639,0.08299,0.07884
3,Vespertinos,0.05381,0.03587,0.02691,0.02691,0.02691,0.02691,0.02691,0.03587,0.04484,0.03587,0.02691,0.02691,0.02691,0.03139,0.03587,0.05381,0.05381,0.04484,0.03587,0.03587,0.05381,0.07175,0.08969,0.07175
4,Dia fuera,0.04795,0.0411,0.03425,0.0274,0.0274,0.0274,0.0274,0.0274,0.0411,0.05479,0.06849,0.06849,0.06849,0.05479,0.05137,0.04452,0.03425,0.0274,0.02397,0.02397,0.0274,0.0411,0.05479,0.05479
5,Otros,0.06993,0.05594,0.04196,0.02797,0.02797,0.02797,0.02797,0.02797,0.02797,0.03147,0.03497,0.03846,0.04196,0.04196,0.04545,0.04196,0.04196,0.04196,0.04196,0.04196,0.04545,0.04895,0.05594,0.06993


Contracted power and average consumption data in Spain

In [61]:
pot_media = 4.5 #kW
consumo_medio_casa = 3754/365 #kWh day
consumo_medio_piso = 3373/365 #kWh day

In [62]:
# If the user lives in a house
cons = consumo_medio_casa
# If the user lives in a flat
cons = consumo_medio_piso

To obtain the curve of average consumption in Wh for each type of consumer, the average daily consumption is multiplied by the coefficients that represent the fraction of daily consumption that each hour supposes. It is also multiplied by the real power between the average, to make the curve proportional to that of the real user. Also, it is multiplied by 1000 to convert kWh to Wh

In [63]:
# For example, if the user has the following contracted power:
pot_c = 4 #kW

In [64]:
perfil_1 = [perfiles.loc[0][1+i]*cons*pot_c/pot_media*1000 for i in range(0,len(perfiles.loc[0])-1)] # En Wh
perfil_2 = [perfiles.loc[1][1+i]*cons*pot_c/pot_media*1000 for i in range(0,len(perfiles.loc[1])-1)]
perfil_3 = [perfiles.loc[2][1+i]*cons*pot_c/pot_media*1000 for i in range(0,len(perfiles.loc[2])-1)]
perfil_4 = [perfiles.loc[3][1+i]*cons*pot_c/pot_media*1000 for i in range(0,len(perfiles.loc[3])-1)]
perfil_5 = [perfiles.loc[4][1+i]*cons*pot_c/pot_media*1000 for i in range(0,len(perfiles.loc[4])-1)]
perfil_6 = [perfiles.loc[5][1+i]*cons*pot_c/pot_media*1000 for i in range(0,len(perfiles.loc[5])-1)]

The net consumption is obtained in each hour, according to the user's profile

In [65]:
# For example, profile 1
diferencia = [perfil_1[i]-produccion[i] for i in range(0, 24)]
diferencia

[322.16513850837134,
 214.7219969558599,
 161.08256925418567,
 161.08256925418567,
 161.08256925418567,
 241.58278234398787,
 292.1503129913086,
 365.36169197560633,
 -1472.6887245882901,
 -2446.13390637535,
 -2923.5338272605086,
 -3131.4958981223663,
 -3259.8030501420453,
 -3341.4873286939305,
 -3391.302064836642,
 -3360.593779467787,
 -3380.2720565742666,
 -3330.438784071066,
 -3204.3124956367074,
 -2544.7526711699215,
 174.76541512634506,
 469.14511141479403,
 536.8871354642314,
 483.16556468797575]

To obtain the economic income for the compensation of surpluses, the price obtained is multiplied (transformed to €/Wh) by the net spillage in each hour

In [66]:
compensacion = []
for i in range(0, len(diferencia)):
    if diferencia[i] < 0:
        compensacion.append((lista_precios[i]/1000000)*abs(diferencia[i]))
    else:
         compensacion.append(0)

In [67]:
compensacion

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0.16413115835536493,
 0.24935889041590317,
 0.2911254985186014,
 0.31189699145298766,
 0.3271538341122556,
 0.3403638993007638,
 0.34272498667239104,
 0.3354880770042692,
 0.33610045058517934,
 0.3312454414637082,
 0.3146634870715247,
 0.2693620702433362,
 0,
 0,
 0,
 0]