# TFM - BID DATAFRAME SELECTION

## 1. INTRODUCTION

The aim of this Notebook is to create yearly dataframes for different units using the OMIE bid files stored locally on a monthly basis, (see **01_RawData_OMIE_merged_files.ipynb** of this TFM). 

In [1]:
import pandas as pd
import numpy as np

## 2. DATAFRAME SELECTION FUNCTION

In this section, a function to select the information from the local .csv files is presented.

In [2]:
#Monthly files stored locally

### CHANGE PATH TO './RawData/OMIE/BIDS/CAB_DET_merged/' ###
cab_det_path = '/home/dsc/Documents/TFM/Data/OMIE/CAB_DET/'

In [3]:
#One year of data: from November 2019 to October 2020.
cab_det_list = !ls -1 $cab_det_path
cab_det_list

['OMIE_012020.csv',
 'OMIE_022020.csv',
 'OMIE_032020.csv',
 'OMIE_042020.csv',
 'OMIE_052020.csv',
 'OMIE_062020.csv',
 'OMIE_072020.csv',
 'OMIE_082020.csv',
 'OMIE_092020.csv',
 'OMIE_102020.csv',
 'OMIE_112019.csv',
 'OMIE_122019.csv']

First of all, September 2020 is selected as example to see how the files are read.

In [4]:
#A new column "Unnamed: 0" is created
df_OMIE_092020 = pd.read_csv(cab_det_path + 'OMIE_092020.csv')

In [5]:
df_OMIE_092020.head()

Unnamed: 0.1,Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,0,1696149,6,EDPC2,EDP COMERCIAL COMPRA (PORT),CNO,6000.0,2020,9,1,22,1,0.01,0.1
1,1,1717319,3,EONUC01,EONUR CONSUMO CLIENTES TUR,CNO,400.0,2020,9,1,1,1,0.0,1.0
2,2,1717319,3,EONUC01,EONUR CONSUMO CLIENTES TUR,CNO,400.0,2020,9,1,2,1,0.0,1.0
3,3,1717319,3,EONUC01,EONUR CONSUMO CLIENTES TUR,CNO,400.0,2020,9,1,3,1,0.0,1.0
4,4,1717319,3,EONUC01,EONUR CONSUMO CLIENTES TUR,CNO,400.0,2020,9,1,4,1,0.0,1.0


In [6]:
df_OMIE_092020.shape

(1478829, 14)

In [7]:
#Information from Sell/Buy distribution
df_OMIE_092020['Sell_Buy'].value_counts()

VNO    1072350
CNO     405759
VNP        720
Name: Sell_Buy, dtype: int64

In [8]:
#Information from Sell/Buy distribution
df_OMIE_092020[df_OMIE_092020['Sell_Buy']=='VNO']['Bid_Unit'].value_counts()

ACAVADO    12818
ADOURO     12686
EGEDRE1    10428
TEMON       8478
MUEL        8214
           ...  
GNRVD39       16
EGVD217       16
GDNA          12
ENURE02        9
SAMPVD3        9
Name: Bid_Unit, Length: 1048, dtype: int64

In [9]:
#Example with PALOS1 for the whole set of data
unit = 'PALOS1'

df_OMIE_unit = pd.DataFrame()

for archive in cab_det_list: #Reading all the files in the local OMIE "datalake"
    df_OMIE_month = pd.read_csv(cab_det_path + archive) #Reding file
    df_OMIE_unit_month = df_OMIE_month.loc[df_OMIE_month['Bid_Unit'].str.startswith(unit)] #Selecting the unit
    df_OMIE_unit = pd.concat([df_OMIE_unit,df_OMIE_unit_month]) #Concatenate the monthly info

In [10]:
df_OMIE_unit.head()

Unnamed: 0.1,Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
26739,26739,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,1,12,180.3,394.1
26740,26740,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,2,12,180.3,394.1
26741,26741,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,3,1,1.13,50.0
26742,26742,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,3,12,180.3,344.1
26743,26743,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,4,1,1.13,60.0


In [11]:
type(df_OMIE_unit)

pandas.core.frame.DataFrame

In [12]:
df_OMIE_unit.shape

(71112, 14)

In [14]:
#Data from each year
df_OMIE_unit['Year'].value_counts()

2020    61045
2019    10067
Name: Year, dtype: int64

In [17]:
#Data from each month
df_OMIE_unit['Month'].value_counts()

12    6541
1     6519
10    6181
8     6169
7     6169
5     6169
3     6157
9     5970
6     5970
4     5970
2     5771
11    3526
Name: Month, dtype: int64

In [18]:
#Data from each day
df_OMIE_unit['Day'].value_counts()

25    2433
24    2426
22    2425
21    2425
1     2424
18    2424
4     2424
28    2424
27    2424
26    2424
3     2424
2     2424
23    2417
20    2415
29    2412
19    2370
5     2221
30    2214
9     2213
6     2213
7     2213
8     2213
16    2213
10    2213
11    2213
12    2213
13    2213
14    2213
15    2213
17    2213
31    1406
Name: Day, dtype: int64

Now, a function **"df_sel_unit"** is created. This function selects one unit by the begining of the "Bid_Unit" code from the OMIE .csv merged files locally stored by month. The function output is a dataframe with the selected info for the whole available period of time.

In [11]:
def df_sel_unit(unit):
    
    ### CHANGE PATH TO './RawData/OMIE/BIDS/CAB_DET_merged/' ###
    cab_det_path = '/home/dsc/Documents/TFM/Data/OMIE/CAB_DET/'
    cab_det_list = !ls -1 $cab_det_path
    
    df_OMIE_unit = pd.DataFrame()

    for archive in cab_det_list:
        df_OMIE_month = pd.read_csv(cab_det_path + archive)
        df_OMIE_unit_month = df_OMIE_month.loc[df_OMIE_month['Bid_Unit'].str.startswith(unit)]
        df_OMIE_unit = pd.concat([df_OMIE_unit,df_OMIE_unit_month])

    df_OMIE_unit = df_OMIE_unit.drop('Unnamed: 0',axis=1).reset_index(drop=True) #Dropping'Unnamed: 0' 
                                                                                 #and reseting the index column
    df_OMIE_unit['Bid_Unit'] = df_OMIE_unit['Bid_Unit'].str.strip() #Deleting spaces in 'Bid_Unit'
    df_OMIE_unit['Unit_Description'] = df_OMIE_unit['Unit_Description'].str.strip() #Deleting spaces 
                                                                                    #in 'Bid_Description'  
    
    return df_OMIE_unit

## 3. SELECTION OF UNITS

Now, different units will be selected and stored locally.

In [20]:
### CHANGE PATH TO './ProcessedData/UNITS_BIDS_Ext/' ###
output_path = '/home/dsc/Documents/TFM/Data/OMIE/'

### 3.1. COMBINED CYCLE UNITS

#### C.C. PALOS 1

In [21]:
df_PALOS1 = df_sel_unit('PALOS1')

In [22]:
df_PALOS1.head()

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,1,12,180.3,394.1
1,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,2,12,180.3,394.1
2,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,3,1,1.13,50.0
3,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,3,12,180.3,344.1
4,6128191,2,PALOS1,C.C. PALOS 1,VNO,394.1,2020,1,1,4,1,1.13,60.0


In [23]:
df_PALOS1.shape

(71112, 13)

In [25]:
#Storing locally information from PALOS1 
df_PALOS1.to_csv(output_path + '/OMIE_PALOS1_112019_102020.csv')

In [24]:
df_PALOS1.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,71112.0,71112.0,71112.0,71112.0,71112.0,71112.0,71112.0,71112.0,71112.0,71112.0
mean,6303431.0,1.775157,394.1,2019.858435,6.362217,15.931404,15.879416,6.519223,52.564769,46.995838
std,143155.0,0.865138,4.406533e-10,0.348606,3.433946,8.900143,5.272937,3.635035,48.738841,75.62004
min,6049903.0,1.0,394.1,2019.0,1.0,1.0,1.0,1.0,1.13,3.0
25%,6176254.0,1.0,394.1,2020.0,3.0,8.0,12.0,3.0,35.4,17.0
50%,6300456.0,2.0,394.1,2020.0,6.0,16.0,16.0,6.0,40.72,17.0
75%,6427697.0,2.0,394.1,2020.0,9.0,24.0,20.0,10.0,45.42,17.0
max,6555573.0,5.0,394.1,2020.0,12.0,31.0,25.0,12.0,180.3,394.1


Note that it is also possible to retreive information for diferent units at the same time. For example for PALOS 1, 2, and 3. This possibility could be useful to take information of sites with more than one unit.

In [31]:
#Using the funtion with "PALOS", to retreive information from PALOS1, 2, and 3.
df_PALOS = df_sel_unit('PALOS')

In [33]:
df_PALOS['Bid_Unit'].value_counts()

PALOS1    71112
PALOS3    70179
PALOS2    68034
Name: Bid_Unit, dtype: int64

#### C.C. PALOS 2

In [15]:
df_PALOS2 = df_sel_unit('PALOS2')

In [16]:
df_PALOS2.to_csv(output_path + 'OMIE_PALOS2_112019_102020.csv')

In [17]:
df_PALOS2.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,68034.0,68034.0,68034.0,68034.0,68034.0,68034.0,68034.0,68034.0,68034.0,68034.0
mean,6311019.0,1.753697,395.6,2019.880663,6.272614,16.079357,15.823162,6.577344,52.470102,48.146045
std,140269.6,0.853485,3.973384e-10,0.324187,3.378788,8.842097,5.194277,3.631865,49.213918,75.452259
min,6073909.0,1.0,395.6,2019.0,1.0,1.0,1.0,1.0,1.13,0.7
25%,6187944.0,1.0,395.6,2020.0,3.0,8.0,12.0,3.0,34.12,21.0
50%,6308806.0,2.0,395.6,2020.0,6.0,16.0,16.0,7.0,39.92,21.0
75%,6432962.0,2.0,395.6,2020.0,9.0,24.0,20.0,10.0,45.07,21.0
max,6555574.0,5.0,395.6,2020.0,12.0,31.0,25.0,12.0,180.3,395.6


#### C.C. PALOS 3

In [24]:
df_PALOS3 = df_sel_unit('PALOS3')

In [25]:
df_PALOS3.to_csv(output_path + 'OMIE_PALOS3_112019_102020.csv')

In [26]:
df_PALOS3.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,70179.0,70179.0,70179.0,70179.0,70179.0,70179.0,70179.0,70179.0,70179.0,70179.0
mean,6306647.0,1.761182,397.8,2019.868964,6.308511,16.094658,15.898303,6.51826,52.205471,47.478203
std,141311.2,0.845831,4.816377e-10,0.337443,3.412395,8.825258,5.267437,3.638984,48.85199,76.024861
min,6071572.0,1.0,397.8,2019.0,1.0,1.0,1.0,1.0,1.13,4.0
25%,6181820.0,1.0,397.8,2020.0,3.0,8.0,12.0,3.0,35.4,17.0
50%,6303211.0,2.0,397.8,2020.0,6.0,16.0,16.0,6.0,40.27,17.0
75%,6428831.0,2.0,397.8,2020.0,9.0,24.0,20.0,10.0,44.69,17.0
max,6555575.0,5.0,397.8,2020.0,12.0,31.0,25.0,12.0,180.3,397.8


#### C.C. SAGUNTO 1

In [13]:
df_SAGUNTO1 = df_sel_unit('SAGU1')

In [14]:
df_SAGUNTO1.to_csv(output_path + 'OMIE_SAGUNTO1_112019_102020.csv')

In [15]:
df_SAGUNTO1.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,70051.0,70051.0,70051.0,70051.0,70051.0,70051.0,70051.0,70051.0,70051.0,70051.0
mean,6288767.0,1.80233,417.3,2019.82527,6.42382,15.531613,15.78864,6.442906,51.608268,51.320317
std,144390.2,0.912518,4.935181e-10,0.379739,3.48705,8.71449,5.271624,3.595803,49.827782,76.781146
min,6049886.0,1.0,417.3,2019.0,1.0,1.0,1.0,1.0,1.13,1.0
25%,6161384.0,1.0,417.3,2020.0,3.0,8.0,12.0,3.0,30.91,23.0
50%,6283632.0,2.0,417.3,2020.0,6.0,15.0,16.0,6.0,38.55,23.0
75%,6413910.0,2.0,417.3,2020.0,9.0,23.0,20.0,10.0,44.52,23.0
max,6544756.0,6.0,417.3,2020.0,12.0,31.0,24.0,12.0,180.3,417.3


#### C.C. SAGUNTO 2

In [11]:
df_SAGUNTO2 = df_sel_unit('SAGU2')

In [12]:
df_SAGUNTO2.to_csv(output_path + 'OMIE_SAGUNTO2_112019_102020.csv')

In [13]:
df_SAGUNTO2.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,68291.0,68291.0,68291.0,68291.0,68291.0,68291.0,68291.0,68291.0,68291.0,68291.0
mean,6304810.0,1.800662,419.6,2019.835308,6.661288,16.084433,15.386127,6.478863,49.555003,52.054461
std,147902.3,0.922009,4.636752e-10,0.370905,3.379315,8.76626,5.276009,3.574855,50.555319,80.238143
min,6049887.0,1.0,419.6,2019.0,1.0,1.0,1.0,1.0,1.13,1.0
25%,6167387.0,1.0,419.6,2020.0,4.0,9.0,11.0,3.0,28.97,23.0
50%,6306386.0,2.0,419.6,2020.0,7.0,16.0,16.0,6.0,35.03,23.0
75%,6434186.0,2.0,419.6,2020.0,10.0,24.0,20.0,10.0,41.01,23.0
max,6555582.0,6.0,419.6,2020.0,12.0,31.0,25.0,12.0,180.3,419.6


#### C.C. CASTELNOU

In [18]:
df_CASTELNOU = df_sel_unit('CTNU')

In [19]:
df_CASTELNOU.to_csv(output_path + 'OMIE_CASTELNOU_112019_102020.csv')

In [20]:
df_CASTELNOU.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,16343.0,16343.0,16343.0,16343.0,16343.0,16343.0,16343.0,16343.0,16343.0,16343.0
mean,6458341.0,0.203696,797.8,2020.0,8.251117,16.554427,15.487732,3.790247,37.38045,103.723325
std,56522.56,0.559456,1.812224e-10,0.0,1.29267,8.746184,5.595016,2.117595,33.051896,133.192236
min,6359428.0,0.0,797.8,2020.0,6.0,1.0,1.0,1.0,0.1,8.0
25%,6408012.0,0.0,797.8,2020.0,7.0,9.0,11.0,2.0,22.5,32.4
50%,6457709.0,0.0,797.8,2020.0,8.0,17.0,16.0,4.0,27.5,49.3
75%,6506282.0,0.0,797.8,2020.0,9.0,24.0,20.0,6.0,30.0,108.8
max,6556087.0,5.0,797.8,2020.0,10.0,31.0,25.0,9.0,180.0,797.8


#### C.C. ESCATRÓN 3

In [5]:
df_ESCATRON3 = df_sel_unit('ECT3')

In [6]:
df_ESCATRON3.to_csv(output_path + 'OMIE_ESCATRON3_112019_102020.csv')

In [7]:
df_ESCATRON3.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,16852.0,16852.0,16852.0,16852.0,16852.0,16852.0,16852.0,16852.0,16852.0,16852.0
mean,6307180.0,1.871944,818.0,2019.837467,6.831415,15.815808,13.060468,1.650131,50.752314,375.67306
std,224796.3,1.118509,0.0,0.36895,3.260986,8.949891,6.678113,0.682146,60.241992,242.395739
min,1847230.0,0.0,818.0,2019.0,1.0,1.0,1.0,1.0,0.01,1.0
25%,6167592.0,1.0,818.0,2020.0,4.0,8.0,8.0,1.0,0.01,145.0
50%,6322599.0,2.0,818.0,2020.0,7.0,16.0,13.0,2.0,36.44,390.0
75%,6443298.0,3.0,818.0,2020.0,10.0,24.0,19.0,2.0,65.3,468.0
max,6557008.0,5.0,818.0,2020.0,12.0,31.0,25.0,4.0,180.3,818.0


#### C.C. ALGECIRAS 3

In [8]:
df_ALGECIRAS3 = df_sel_unit('ALG3')

In [9]:
df_ALGECIRAS3.to_csv(output_path + 'OMIE_ALGECIRAS3_112019_102020.csv')

In [10]:
df_ALGECIRAS3.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,16305.0,16305.0,16305.0,16305.0,16305.0,16305.0,16305.0,16305.0,16305.0,16305.0
mean,6319985.0,1.83042,830.1,2019.917571,6.049985,15.537565,13.191782,1.654523,46.783579,356.560067
std,135691.5,1.091218,2.367033e-10,0.275025,3.152918,8.597059,6.6209,0.697541,54.119732,248.370574
min,6051653.0,0.0,830.1,2019.0,1.0,1.0,1.0,1.0,0.01,2.0
25%,6207249.0,1.0,830.1,2020.0,3.0,8.0,8.0,1.0,0.01,145.0
50%,6320966.0,2.0,830.1,2020.0,6.0,15.0,13.0,2.0,36.3,380.0
75%,6431758.0,3.0,830.1,2020.0,9.0,23.0,19.0,2.0,64.3,450.1
max,6557009.0,5.0,830.1,2020.0,12.0,31.0,25.0,4.0,180.3,830.1


#### C.C. ARCOS 1

In [4]:
df_ARCOS1 = df_sel_unit('ARCOS1')

In [5]:
df_ARCOS1.to_csv(output_path + 'OMIE_ARCOS1_112019_102020.csv')

In [6]:
df_ARCOS1.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,29640.0,29640.0,29640.0,29640.0,29640.0,29640.0,29640.0,29640.0,29640.0,29640.0
mean,6290862.0,1.669872,395.6,2019.833367,6.370513,15.843185,14.606275,2.808806,62.868212,101.59475
std,137246.0,0.91142,2.157813e-10,0.372654,3.383568,8.960593,6.085381,1.432484,52.739812,93.444838
min,6062095.0,0.0,395.6,2019.0,1.0,1.0,1.0,1.0,0.0,2.5
25%,6165150.0,1.0,395.6,2020.0,4.0,8.0,10.0,1.0,36.24,34.4
50%,6296748.0,1.0,395.6,2020.0,6.0,16.0,15.0,3.0,47.425,67.8
75%,6415786.0,2.0,395.6,2020.0,9.0,24.0,20.0,4.0,66.05,156.0
max,6555360.0,5.0,395.6,2020.0,12.0,31.0,24.0,5.0,150.3,395.6


#### C.C. ARCOS 2

In [5]:
df_ARCOS2 = df_sel_unit('ARCOS2')

In [6]:
df_ARCOS2.to_csv(output_path + 'OMIE_ARCOS2_112019_102020.csv')

In [7]:
df_ARCOS2.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,27840.0,27840.0,27840.0,27840.0,27840.0,27840.0,27840.0,27840.0,27840.0,27840.0
mean,6319570.0,1.700036,379.4,2019.878412,6.517385,15.436961,14.696049,2.6713,59.986223,105.483966
std,140793.6,0.87479,1.984439e-10,0.326815,3.192111,8.71443,5.974838,1.376697,53.874166,92.710079
min,6050105.0,0.0,379.4,2019.0,1.0,1.0,1.0,1.0,0.0,0.6
25%,6205694.0,1.0,379.4,2020.0,4.0,8.0,10.0,1.0,34.49,37.3
50%,6331062.0,1.0,379.4,2020.0,7.0,15.0,15.0,3.0,46.23,69.4
75%,6438069.0,2.0,379.4,2020.0,9.0,23.0,20.0,4.0,61.2,156.4
max,6555361.0,5.0,379.4,2020.0,12.0,31.0,25.0,6.0,159.3,379.4


#### C.C. ARCOS 3

In [8]:
df_ARCOS3 = df_sel_unit('ARCOS3')

In [9]:
df_ARCOS3.to_csv(output_path + 'OMIE_ARCOS3_112019_102020.csv')

In [10]:
df_ARCOS3.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,33974.0,33974.0,33974.0,33974.0,33974.0,33974.0,33974.0,33974.0,33974.0,33974.0
mean,6302730.0,1.703155,822.9,2019.833785,6.629099,16.014835,13.254106,2.680874,52.925021,189.845161
std,149616.0,0.869952,4.381555e-10,0.372279,3.46826,8.788096,6.674954,1.31396,49.824321,158.557312
min,6050106.0,0.0,822.9,2019.0,1.0,1.0,1.0,1.0,0.0,0.1
25%,6165152.0,1.0,822.9,2020.0,4.0,8.0,8.0,2.0,26.84,88.0
50%,6303308.0,1.0,822.9,2020.0,7.0,16.0,14.0,3.0,39.485,149.2
75%,6434218.0,2.0,822.9,2020.0,10.0,24.0,19.0,4.0,59.31,254.9
max,6555362.0,5.0,822.9,2020.0,12.0,31.0,25.0,6.0,155.7,822.9


#### C.C. COLÓN 4

In [10]:
df_COLON4 = df_sel_unit('COL4')

In [11]:
df_COLON4.to_csv(output_path + 'OMIE_COLON4_112019_102020.csv')

In [12]:
df_COLON4.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,19958.0,19958.0,19958.0,19958.0,19958.0,19958.0,19958.0,19958.0,19958.0,19958.0
mean,6302252.0,2.988075,392.0,2019.869777,6.224672,15.163343,14.089538,1.861459,66.612089,161.191061
std,138210.6,1.598578,0.0,0.336558,3.361834,8.747118,6.285857,0.815307,67.985325,97.849555
min,6049999.0,1.0,392.0,2019.0,1.0,1.0,1.0,1.0,0.0,2.0
25%,6184246.0,2.0,392.0,2020.0,3.0,8.0,9.0,1.0,0.0,82.0
50%,6300670.0,3.0,392.0,2020.0,6.0,15.0,14.0,2.0,27.11,135.0
75%,6419440.0,4.0,392.0,2020.0,9.0,23.0,19.0,3.0,150.02,185.0
max,6553059.0,9.0,392.0,2020.0,12.0,31.0,24.0,3.0,150.02,392.0


### 3.2. HYDRAULIC UNITS

#### C.H. AGUAYO - GENERATION

In [29]:
df_AGUAYO_GEN = df_sel_unit('AGUG')

In [30]:
df_AGUAYO_GEN.shape

(42831, 13)

In [31]:
df_AGUAYO_GEN.head()

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6129555,1,AGUG,C.H. AGUAYO GENERACION,VNO,360.6,2020,1,1,1,1,30.0,80.0
1,6129555,1,AGUG,C.H. AGUAYO GENERACION,VNO,360.6,2020,1,1,1,2,48.0,80.0
2,6129555,1,AGUG,C.H. AGUAYO GENERACION,VNO,360.6,2020,1,1,1,3,52.0,80.0
3,6129555,1,AGUG,C.H. AGUAYO GENERACION,VNO,360.6,2020,1,1,1,4,56.0,80.0
4,6129555,1,AGUG,C.H. AGUAYO GENERACION,VNO,360.6,2020,1,1,1,5,180.3,40.6


In [32]:
df_AGUAYO_GEN.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,42831.0,42831.0,42831.0,42831.0,42831.0,42831.0,42831.0,42831.0,42831.0,42831.0
mean,6295875.0,1.85863,360.6,2019.83059,6.511078,15.719689,12.489505,2.951951,67.435731,71.510016
std,146607.3,1.111704,2.535246e-10,0.375118,3.45078,8.778838,6.925643,1.39805,53.262971,20.507271
min,6051654.0,0.0,360.6,2019.0,1.0,1.0,1.0,1.0,2.2,0.6
25%,6168916.0,1.0,360.6,2020.0,4.0,8.0,6.0,2.0,36.0,60.6
50%,6291789.0,2.0,360.6,2020.0,6.0,16.0,12.0,3.0,48.0,80.0
75%,6421067.0,3.0,360.6,2020.0,10.0,23.0,18.0,4.0,61.0,80.0
max,6557010.0,5.0,360.6,2020.0,12.0,31.0,25.0,5.0,180.3,220.0


In [57]:
#Storing info locally
df_AGUAYO_GEN.to_csv(output_path + 'OMIE_AGUAYO_GEN_112019_102020.csv')

#### C.H. LA MUELA

In [34]:
df_MUELA = df_sel_unit('MUEL')

In [35]:
df_MUELA.head()

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6128223,1,MUEL,C.H. LA MUELA TURBINACION,VNO,1506.2,2020,1,1,1,1,43.0,100.0
1,6128223,1,MUEL,C.H. LA MUELA TURBINACION,VNO,1506.2,2020,1,1,1,2,43.58,100.0
2,6128223,1,MUEL,C.H. LA MUELA TURBINACION,VNO,1506.2,2020,1,1,1,3,44.3,100.0
3,6128223,1,MUEL,C.H. LA MUELA TURBINACION,VNO,1506.2,2020,1,1,1,4,44.88,100.0
4,6128223,1,MUEL,C.H. LA MUELA TURBINACION,VNO,1506.2,2020,1,1,1,5,45.6,100.0


In [7]:
df_MUELA.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,119068.0,119068.0,119068.0,119068.0,119068.0,119068.0,119068.0,119068.0,119068.0,119068.0
mean,6283446.0,1.651023,1506.2,2019.818037,6.379136,15.689236,12.52129,7.427806,52.626695,100.19587
std,144941.2,0.849987,1.424957e-09,0.385816,3.550877,8.758828,6.934197,4.119922,32.040705,10.878357
min,6050090.0,0.0,1506.2,2019.0,1.0,1.0,1.0,1.0,3.42,67.9
25%,6156994.0,1.0,1506.2,2020.0,3.0,8.0,6.0,4.0,37.2,100.0
50%,6274190.0,1.0,1506.2,2020.0,6.0,16.0,13.0,7.0,45.7,100.0
75%,6407478.0,2.0,1506.2,2020.0,10.0,23.0,19.0,11.0,54.58,100.0
max,6555346.0,5.0,1506.2,2020.0,12.0,31.0,25.0,15.0,160.0,1096.7


In [9]:
#Storing info locally
df_MUELA.to_csv(output_path + 'OMIE_MUELA_112019_102020.csv')

#### C.H. TAJO ENCANTADA - GENERATION

In [36]:
df_TAJOENCANT = df_sel_unit('TJEG')

In [37]:
df_TAJOENCANT.head()

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6128022,1,TJEG,C.H. TAJO ENCANTADA GENERACION,VNO,360.0,2020,1,1,9,1,41.37,90.0
1,6128022,1,TJEG,C.H. TAJO ENCANTADA GENERACION,VNO,360.0,2020,1,1,9,2,45.87,90.0
2,6128022,1,TJEG,C.H. TAJO ENCANTADA GENERACION,VNO,360.0,2020,1,1,9,3,46.75,90.0
3,6128022,1,TJEG,C.H. TAJO ENCANTADA GENERACION,VNO,360.0,2020,1,1,10,1,41.37,90.0
4,6128022,1,TJEG,C.H. TAJO ENCANTADA GENERACION,VNO,360.0,2020,1,1,10,2,45.87,90.0


In [44]:
df_TAJOENCANT.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,11504.0,11504.0,11504.0,11504.0,11504.0,11504.0,11504.0,11504.0,11504.0,11504.0
mean,6324261.0,3.081711,360.0,2019.871001,6.700278,15.940455,16.14056,1.830059,41.182554,91.157858
std,141724.7,1.587495,0.0,0.335213,3.158902,8.799308,5.097585,0.87846,11.88677,14.966355
min,6049972.0,0.0,360.0,2019.0,1.0,1.0,1.0,1.0,9.21,90.0
25%,6221355.0,2.0,360.0,2020.0,4.0,9.0,12.0,1.0,32.37,90.0
50%,6317035.0,3.0,360.0,2020.0,7.0,16.0,16.0,2.0,43.82,90.0
75%,6444511.0,4.0,360.0,2020.0,9.0,23.0,21.0,2.0,50.1,90.0
max,6555507.0,10.0,360.0,2020.0,12.0,31.0,25.0,4.0,69.2,360.0


In [43]:
df_TAJOENCANT.to_csv(output_path + 'OMIE_TAJOENCANT_112019_102020.csv')

#### C.H. MORALETS - GENERATION

In [12]:
df_MORALETS = df_sel_unit('MLTG')

In [13]:
df_MORALETS.head()

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6128020,1,MLTG,C.H. MORALETS GENERACION,VNO,221.4,2020,1,1,9,1,40.4,65.0
1,6128020,1,MLTG,C.H. MORALETS GENERACION,VNO,221.4,2020,1,1,9,2,40.82,65.0
2,6128020,1,MLTG,C.H. MORALETS GENERACION,VNO,221.4,2020,1,1,10,1,40.4,65.0
3,6128020,1,MLTG,C.H. MORALETS GENERACION,VNO,221.4,2020,1,1,10,2,40.82,65.0
4,6128020,1,MLTG,C.H. MORALETS GENERACION,VNO,221.4,2020,1,1,11,1,40.4,65.0


In [5]:
df_MORALETS.to_csv(output_path + 'OMIE_MORALETS_112019_102020.csv')

In [14]:
df_MORALETS.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,14524.0,14524.0,14524.0,14524.0,14524.0,14524.0,14524.0,14524.0,14524.0,14524.0
mean,6289912.0,2.973079,221.4,2019.847287,6.175434,15.937414,15.617943,1.774993,40.638704,67.22425
std,142407.6,1.520193,5.738541e-11,0.359723,3.462004,8.885805,5.41401,0.750619,12.337411,14.101472
min,6049971.0,1.0,221.4,2019.0,1.0,1.0,1.0,1.0,10.0,30.0
25%,6167504.0,2.0,221.4,2020.0,3.0,8.0,11.0,1.0,31.395,65.0
50%,6285009.0,3.0,221.4,2020.0,6.0,16.0,16.0,2.0,42.0,65.0
75%,6401688.0,4.0,221.4,2020.0,9.0,24.0,20.0,2.0,49.0,65.0
max,6555505.0,10.0,221.4,2020.0,12.0,31.0,25.0,3.0,68.84,195.0


#### C.H. SIL

In [46]:
df_SIL = df_sel_unit('SIL')

In [47]:
df_SIL.to_csv(output_path + 'OMIE_SIL_112019_102020.csv')

In [48]:
df_SIL.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,98627.0,98627.0,98627.0,98627.0,98627.0,98627.0,98627.0,98627.0,98627.0,98627.0
mean,6286401.0,1.639298,1164.070681,2019.818944,6.432224,15.927718,12.209121,4.121549,49.925527,116.654335
std,142422.9,0.903856,486.150561,0.385066,3.390413,8.832101,6.94946,2.640125,38.980637,150.177536
min,6045107.0,0.0,0.4,2019.0,1.0,1.0,1.0,1.0,0.0,0.1
25%,6165141.0,1.0,412.0,2020.0,4.0,8.0,6.0,2.0,25.3,68.0
50%,6286200.0,1.0,1477.7,2020.0,6.0,16.0,12.0,3.0,42.13,72.0
75%,6397545.0,2.0,1477.7,2020.0,10.0,24.0,18.0,6.0,58.92,78.0
max,6555351.0,5.0,1477.7,2020.0,12.0,31.0,25.0,11.0,160.0,1166.3


### 3.3. NUCLEAR UNITS

#### C.N. COFRENTES

In [10]:
df_COFRENTES = df_sel_unit('COF1')

In [11]:
df_COFRENTES.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,2406.0,2406.0,2406.0,2406.0,2406.0,2406.0,2406.0,2406.0,2406.0,2406.0
mean,6262491.0,1.614713,1063.9,2019.887781,5.133001,14.395262,11.040732,1.169992,59.11724,510.020324
std,79615.76,0.852985,4.844066e-11,0.315702,2.803876,8.703599,7.484037,0.375704,81.395415,345.219868
min,6050098.0,0.0,1063.9,2019.0,1.0,1.0,1.0,1.0,0.0,0.1
25%,6237036.0,1.0,1063.9,2020.0,4.0,7.0,4.0,1.0,0.0,342.9
50%,6261034.0,1.0,1063.9,2020.0,4.0,13.0,11.0,1.0,0.0,351.9
75%,6298934.0,2.0,1063.9,2020.0,5.0,22.0,18.0,1.0,180.3,719.75
max,6555354.0,5.0,1063.9,2020.0,12.0,31.0,24.0,2.0,180.3,1063.9


In [12]:
#Storing info locally
df_COFRENTES.to_csv(output_path + 'OMIE_COFRENTES_112019_102020.csv')

### 3.4. COAL UNITS

#### C.N. LITORAL

In [12]:
df_LITORAL1 = df_sel_unit('LIT1')

In [13]:
df_LITORAL1.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,23401.0,23401.0,23401.0,23401.0,23401.0,23401.0,23401.0,23401.0,23401.0,23401.0
mean,6303916.0,3.014187,557.5,2019.869877,6.227811,16.078629,15.367335,2.217768,59.030859,196.793351
std,144109.4,1.570934,0.0,0.336446,3.324543,8.902836,5.940842,1.12391,38.638937,155.619108
min,6049988.0,1.0,557.5,2019.0,1.0,1.0,1.0,1.0,0.0,1.0
25%,6180643.0,2.0,557.5,2020.0,3.0,8.0,12.0,1.0,0.0,150.0
50%,6303272.0,3.0,557.5,2020.0,6.0,16.0,16.0,2.0,66.36,187.0
75%,6427675.0,4.0,557.5,2020.0,9.0,24.0,20.0,3.0,99.01,187.0
max,6555520.0,10.0,557.5,2020.0,12.0,31.0,25.0,4.0,99.03,557.5


In [14]:
#Storing info locally
df_LITORAL1.to_csv(output_path + 'OMIE_LITORAL1_112019_102020.csv')

### 3.5. SOLAR UNITS

#### C.S. MULA

In [58]:
df_MULA = df_sel_unit('IGNVD13')

In [59]:
df_MULA.to_csv(output_path + 'OMIE_MULA_112019_102020.csv')

In [60]:
df_MULA.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,4629.0,4629.0,4629.0,4629.0,4629.0,4629.0,4629.0,4629.0,4629.0,4629.0
mean,6309158.0,0.054871,493.7,2019.864118,6.478937,15.797797,14.268525,1.0,1.024701,171.141521
std,143790.5,0.262156,1.483774e-11,0.342701,3.227515,8.83429,3.824116,0.0,12.982118,125.283636
min,5899780.0,0.0,493.7,2019.0,1.0,1.0,1.0,1.0,0.0,0.1
25%,6193033.0,0.0,493.7,2020.0,4.0,8.0,11.0,1.0,0.0,48.4
50%,6317803.0,0.0,493.7,2020.0,6.0,16.0,14.0,1.0,0.0,161.8
75%,6427317.0,0.0,493.7,2020.0,9.0,23.0,17.0,1.0,0.0,291.1
max,6557186.0,2.0,493.7,2020.0,12.0,31.0,24.0,1.0,180.0,402.0


In [61]:
df_MULA.head()

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,9,1,0.0,9.5
1,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,10,1,0.0,89.1
2,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,11,1,0.0,221.3
3,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,12,1,0.0,300.1
4,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,13,1,0.0,317.8


In [63]:
#Checking the bid behaviour in the first 2 days of January
df_MULA[df_MULA['Day'].isin(range(1,3))&(df_MULA['Month']==1)&(df_MULA['Year']==2020)]

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,9,1,0.0,9.5
1,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,10,1,0.0,89.1
2,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,11,1,0.0,221.3
3,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,12,1,0.0,300.1
4,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,13,1,0.0,317.8
5,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,14,1,0.0,314.7
6,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,15,1,0.0,278.1
7,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,16,1,0.0,198.7
8,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,17,1,0.0,96.0
9,6129101,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,1,1,18,1,0.0,12.5


In [64]:
#Checking the bid behaviour in the first 2 days of August
df_MULA[df_MULA['Day'].isin(range(1,3))&(df_MULA['Month']==8)&(df_MULA['Year']==2020)]

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
2792,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,8,1,0.0,4.9
2793,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,9,1,0.0,42.7
2794,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,10,1,0.0,124.3
2795,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,11,1,0.0,211.1
2796,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,12,1,0.0,282.6
2797,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,13,1,0.0,334.6
2798,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,14,1,0.0,356.3
2799,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,15,1,0.0,323.2
2800,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,16,1,0.0,292.7
2801,6425872,0,IGNVD13,SOLAR MULA,VNO,493.7,2020,8,1,17,1,0.0,248.5


### 3.6. BIOMASS UNITS

#### BIOMASS ENCE PUERTOLLANO 1

In [28]:
df_ENCEPU1 = df_sel_unit('ENEVD15')

In [29]:
df_ENCEPU1.to_csv(output_path + 'OMIE_ENCEPU1_112019_102020.csv')

In [30]:
df_ENCEPU1.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,5690.0,5690.0,5690.0,5690.0,5690.0,5690.0,5690.0,5690.0,5690.0,5690.0
mean,6367855.0,2.206151,50.0,2020.0,6.223726,15.719332,12.51529,1.0,0.035325,35.704042
std,106996.3,1.515792,0.0,0.0,2.497747,8.82211,6.910334,0.0,1.874696,3.185292
min,6128967.0,0.0,50.0,2020.0,1.0,1.0,1.0,1.0,0.0,0.1
25%,6282929.0,1.0,50.0,2020.0,4.0,8.0,7.0,1.0,0.0,34.0
50%,6372124.0,2.0,50.0,2020.0,6.0,16.0,13.0,1.0,0.0,37.0
75%,6457459.0,3.0,50.0,2020.0,8.0,23.0,18.0,1.0,0.0,37.0
max,6547234.0,6.0,50.0,2020.0,10.0,31.0,24.0,1.0,100.0,42.0


In [31]:
#Checking if prices different to 0 are outliers
df_ENCEPU1[df_ENCEPU1['Price']==df_ENCEPU1['Price'].max()]

Unnamed: 0,Bid_Code,Num_Version,Bid_Unit,Unit_Description,Sell_Buy,Pot_max,Year,Month,Day,Period,Block,Price,Energy
0,6128967,0,ENEVD15,ENCE ENERGIA PUERTOLLANO,VNO,50.0,2020,1,1,8,1,100.0,0.1
1,6128968,0,ENEVD15,ENCE ENERGIA PUERTOLLANO,VNO,50.0,2020,1,2,8,1,100.0,0.1


#### BIOMASS ENCE HUELVA

In [49]:
df_ENCEHUELVA = df_sel_unit('ENEVD14')

In [50]:
df_ENCEHUELVA.to_csv(output_path + 'OMIE_ENCEHUELVA_112019_102020.csv')

In [51]:
df_ENCEHUELVA.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,5900.0,5900.0,5900.0,5900.0,5900.0,5900.0,5900.0,5900.0,5900.0,5900.0
mean,6353760.0,2.637458,46.0,2019.973559,6.374576,15.851356,12.566102,1.0,0.979661,35.052458
std,128046.7,1.280172,0.0,0.160455,2.688249,8.583491,6.872969,0.0,8.818464,6.163209
min,6050518.0,0.0,46.0,2019.0,1.0,1.0,1.0,1.0,0.0,0.1
25%,6264416.0,2.0,46.0,2020.0,4.0,9.0,7.0,1.0,0.0,34.0
50%,6357656.0,3.0,46.0,2020.0,6.0,16.0,13.0,1.0,0.0,37.0
75%,6464184.0,3.0,46.0,2020.0,9.0,23.0,18.0,1.0,0.0,38.0
max,6557228.0,6.0,46.0,2020.0,12.0,31.0,24.0,1.0,100.0,40.5


#### BIOMASS CUBILLOS DEL SIL

In [37]:
df_CUBILLOSSIL = df_sel_unit('NXVD147')

In [38]:
df_CUBILLOSSIL.to_csv(output_path + 'OMIE_CUBILLOSSIL_112019_102020.csv')

In [39]:
df_CUBILLOSSIL.describe()

Unnamed: 0,Bid_Code,Num_Version,Pot_max,Year,Month,Day,Period,Block,Price,Energy
count,3048.0,3048.0,3048.0,3048.0,3048.0,3048.0,3048.0,3048.0,3048.0,3048.0
mean,6406855.0,0.154199,49.5,2020.0,7.102034,15.616142,12.553806,1.0,0.0,40.68668
std,85662.62,0.421574,0.0,0.0,1.930807,8.816342,6.916758,0.0,0.0,7.993973
min,6220995.0,0.0,49.5,2020.0,3.0,1.0,1.0,1.0,0.0,5.0
25%,6333286.0,0.0,49.5,2020.0,5.0,9.0,7.0,1.0,0.0,35.0
50%,6391420.0,0.0,49.5,2020.0,7.0,15.0,13.0,1.0,0.0,46.0
75%,6497657.0,0.0,49.5,2020.0,9.0,24.0,19.0,1.0,0.0,46.0
max,6553273.0,3.0,49.5,2020.0,10.0,31.0,24.0,1.0,0.0,49.5
