### Libraries, paths, and set-up

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import math
import os
os.chdir('/Users/manotas/Documents/GitHub-Repos/ML-Energy-Colombia')
import warnings
from src.data.loader import data_loader
from src.utils.utils import *
from src.utils.calculations import *
warnings.filterwarnings('ignore')
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import statsmodels.api as sm
import statsmodels.formula.api as smf
from linearmodels.panel import PanelOLS
import linearmodels as lm


storing_path = 'data/processed/'

In [2]:
# Loading previous datasets
fullsupply = pd.read_csv(storing_path + 'fullsupply.csv')
demand, mcost, bidprice = data_loader('demand', 'mcost', 'bidprice')

for i in [fullsupply, mcost, demand]:
    i = datetimer(i)

### Handling demand data

In [9]:
# Constucting a dataframe with all relevant information for demand
fulldemand = demand.groupby('datetime')['demand_hourly'].sum().reset_index()
fulldemand = fulldemand.merge(bidprice, on='datetime', how='left')
fulldemand.to_csv(storing_path + 'fulldemand.csv', index=False)

### Merging supply and demand
Constructing a complete dataframe with all market information

In [10]:
# Merging both market forces (and marginal cost) into one dataframe
market = fullsupply.merge(fulldemand, on='datetime', how='left')
market = market.merge(mcost, on='datetime', how='left')
market = market.drop(columns=['ideal_ask','dispatch_ask'])

# Calculating market concentration measures for all observations
functions = [calculate_rsi, calculate_lerner]

for func in functions:
    market = func(market)

market = market.sort_values(by=['datetime','plant'])
market = market.reset_index(drop=True)

market.to_csv(storing_path + 'market.csv', index=False)

### Filtering the data

In [5]:
# Now focusing on exclusively positive markups, since those are the ones dispatched at any given timestamp
posmarket = market[market['markup'] > 0]
# And focusing on those with no outliers, since there is the possbility of discretionary high pricing to not dispatch in a day
# or discretionary low pricing to effectively dispatch during a day
iqrmarket = iqr_filter_yearly(posmarket, 'markup')

### How to assess market concentration?
Ideally, the market would clearly fully at competitive pricing, as explained, this is not verifiable and market power may be existent

In [6]:
# Verifying the correlation between RSI, markups, and the Lerner index
iqrmarket_r = iqrmarket.copy()
iqrmarket_r = datetimer(iqrmarket_r)
iqrmarket_r.set_index(['agent_code', 'datetime'], inplace=True)

A FE (fixed effects) regression to control for time-invariant unobserved individual effects. `EntityEffects` represents the fixed effects component of the model. This will add a dummy variable for each unique agent in the data (minus one to avoid the dummy variable trap). Since we asusme that individuals relaying market power are agents through their power plants.

In [7]:
femod = PanelOLS.from_formula('markup ~ rsi_agent + EntityEffects', data=iqrmarket_r)
fe_res = femod.fit(cov_type='clustered', cluster_entity=True)

print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:                 markup   R-squared:                        0.0014
Estimator:                   PanelOLS   R-squared (Between):             -0.2965
No. Observations:             3554311   R-squared (Within):               0.0014
Date:                Sat, Feb 24 2024   R-squared (Overall):             -0.2039
Time:                        03:32:19   Log-likelihood                -2.483e+07
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      4850.8
Entities:                          27   P-value                           0.0000
Avg Obs:                    1.316e+05   Distribution:               F(1,3554283)
Min Obs:                       1.0000                                           
Max Obs:                    7.716e+05   F-statistic (robust):             6.2523
                            

A random effects model assuming that individual-specific effects are uncorrelated with the regressors; a pooled OLS regression

In [8]:
remod = lm.RandomEffects.from_formula('markup ~ rsi_agent', data=iqrmarket_r)
results = remod.fit()

print(results)

                        RandomEffects Estimation Summary                        
Dep. Variable:                 markup   R-squared:                        0.0014
Estimator:              RandomEffects   R-squared (Between):             -0.2958
No. Observations:             3554311   R-squared (Within):               0.0014
Date:                Sat, Feb 24 2024   R-squared (Overall):             -0.2034
Time:                        03:32:27   Log-likelihood                -2.483e+07
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      4832.4
Entities:                          27   P-value                           0.0000
Avg Obs:                    1.316e+05   Distribution:               F(1,3554310)
Min Obs:                       1.0000                                           
Max Obs:                    7.716e+05   F-statistic (robust):             4832.4
                            