# ERCOT Market Analysis (Energy Only Offers)
*StrategyWise for Southern Power Company, 2019.Q4*

*Data Analysis Performed by Robin Sanders*

In [1]:
# Import necessary packages
import pandas as pd
from pandas import DataFrame, read_csv
import numpy as np
import calendar
import glob
from datetime import datetime
from dateutil.parser import parse
import os
import urllib.request
import seaborn as sns

# Matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

#Regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
import math
%matplotlib inline

#see all columns/rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


# Part 1: Prepare the Data------------------------------------------------------------

## Import and Clean Data
###  strip/clean column headers, datetime conversion on:

##### Environmental: Market Data, Locational Marginal Pricing, Nodes
*Third Party Environmental: NOAA Hourly Weather Data*
##### Transactional: Energy Only Offers
*Third Party Transactional: Daily NASDAQ, DOWJONES, ETF prices*

In [2]:
#ERCOT
market_df = pd.read_excel('OneDrive_1_10-22-2019/ercot_market_data.xlsx', sheet_name = 'ercot_market_data')
lmp_df = pd.read_csv('OneDrive_1_10-22-2019/ercotlmp.csv')
nodes_df = pd.read_excel('OneDrive_1_10-22-2019/ercot_nodes.xlsx')
all_offers_df = pd.concat([pd.read_csv(f) for f in glob.glob('OneDrive_1_10-22-2019/EnergyOnlyOffers/*.csv')], ignore_index = True)

#clean columns
all_offers_df.columns = all_offers_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace(' - ',' ')
market_df.columns = market_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
lmp_df.columns = lmp_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
nodes_df.columns = nodes_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')

#drop duplicate/unnecessary/sparse columns
market_df = market_df.drop(columns=['datetime','year'])
lmp_df = lmp_df.drop(columns=['datetime','year'])
nodes_df = nodes_df.drop(columns=['iso','weatherstationid','first_dart_date','last_dart_date','equipment','voltage',
                                    'substation','nodetype','zoneid','objectid'])

#Convert marketday feature from datetime type to string type
market_df['marketday'] = market_df['marketday'].dt.strftime('%m/%d/%Y')

#3rd party data
weather_df_1 = pd.read_csv('additional_data/weather_data_1.csv',low_memory=False)
weather_df_2 = pd.read_csv('additional_data/weather_data_2.csv',low_memory=False)
weather_df_3 = pd.read_csv('additional_data/weather_data_3.csv',low_memory=False)
nasdaq_df = pd.read_csv('additional_data/nasdaq_data.csv')
etf_df = pd.read_csv('additional_data/etf_data.csv')
dowjones_df = pd.read_csv('additional_data/dow_jones_data.csv')

weather_df_1.columns = weather_df_1.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
weather_df_2.columns = weather_df_2.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
weather_df_3.columns = weather_df_3.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
nasdaq_df.columns = nasdaq_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
etf_df.columns = etf_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')
dowjones_df.columns = dowjones_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('-','')

#Select hourly weather data 
weather_df_1_trim = weather_df_1[['station','date','hourlydrybulbtemperature','hourlyrelativehumidity',
                                'hourlystationpressure','hourlywinddirection','hourlywindspeed']]
weather_df_2_trim = weather_df_2[['station','date','hourlydrybulbtemperature','hourlyrelativehumidity',
                                'hourlystationpressure','hourlywinddirection','hourlywindspeed']]
weather_df_3_trim = weather_df_3[['station','date','hourlydrybulbtemperature','hourlyrelativehumidity',
                                'hourlystationpressure','hourlywinddirection','hourlywindspeed']]

#rename columns, drop 'close' column and use adjusted close column 'adj_close'
nasdaq_df = nasdaq_df.rename(columns={'date':'nasdaq_date','open':'nasdaq_open','high':'nasdaq_high',
                                     'low':'nasdaq_low','close':'nasdaq_close','adj_close':'nasdaq_adj_close','volume':'nasdaq_volume'})
nasdaq_df = nasdaq_df.drop(columns='nasdaq_close')
etf_df = etf_df.rename(columns={'date':'etf_date','open':'etf_open','high':'etf_high','low':'etf_low',
                                     'close':'etf_close','adj_close':'etf_adj_close','volume':'etf_volume'})
etf_df = etf_df.drop(columns='etf_close')
dowjones_df = dowjones_df.rename(columns={'date':'dowjones_date','open':'dowjones_open','high':'dowjones_high',
                                     'low':'dowjones_low','close':'dowjones_close','adj_close':'dowjones_adj_close','volume':'dowjones_volume'})
dowjones_df = dowjones_df.drop(columns='dowjones_close')


### Displaying Content of ERCOT Energy Only Offers

In [3]:
print(all_offers_df.head().shape)
all_offers_df.head().head()

(5, 27)


Unnamed: 0,delivery_date,hour_ending,settlement_point,qse_name,energy_only_offer_mw1,energy_only_offer_price1,energy_only_offer_mw2,energy_only_offer_price2,energy_only_offer_mw3,energy_only_offer_price3,energy_only_offer_mw4,energy_only_offer_price4,energy_only_offer_mw5,energy_only_offer_price5,energy_only_offer_mw6,energy_only_offer_price6,energy_only_offer_mw7,energy_only_offer_price7,energy_only_offer_mw8,energy_only_offer_price8,energy_only_offer_mw9,energy_only_offer_price9,energy_only_offer_mw10,energy_only_offer_price10,energy_only_offer_id,multi-hour_block_indicator,block/curve_indicator
0,04/29/2019,1,AEEC,QPEBSE,2.0,-6.9,,,,,,,,,,,,,,,,,,,D1,N,V
1,04/29/2019,1,AEEC,QREUEL,15.0,-30.0,,,,,,,,,,,,,,,,,,,rj1,N,V
2,04/29/2019,1,AMO_AMOCO_S2,QKCPL1,10.0,-24.99,,,,,,,,,,,,,,,,,,,KCPLEOO7252,N,V
3,04/29/2019,1,ANACACHO_ANA,QAMTRA,2.0,-20.0,,,,,,,,,,,,,,,,,,,01,N,V
4,04/29/2019,1,BAFFIN_ALL,QREUEL,46.0,-30.0,,,,,,,,,,,,,,,,,,,rd1,N,V


## Trim Energy Offer Data:  Top 10 Leaders in Timeframe 1.2.19 - 7.12.19

In [5]:
#Top 10 leaders
lead_ptp_offers = ptp_bids_df.loc[ptp_bids_df['qse_name'].isin(['QLUMN','QNRGTX','QDCENG','QREUEL','QSHELL',
                                                                    'QDIRE','QPREC','QMONT','QWOLFP','QTIOS'])]
#Select Data within timeline presented in Jeff's Power BI Dashboard (Jan 2, 2019 - July 12, 2019)
lead_ptp_offers['date'] = pd.to_datetime(lead_ptp_offers['delivery_date'])
mask = (lead_ptp_offers['date'] >= '2019-01-02') & (lead_ptp_offers['date'] < '2019-07-13')
lead_ptp_offers = lead_ptp_offers.loc[mask]

#lead_ptp_offers is the base of the merging section to create a model-ready dataset

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


## Join Market Data, LMP, Nodes & Leader Energy Only Offers Tables = data_ercot

In [6]:
#Join Market Condition & Location Pricing & Node Data
market_lmp = lmp_df.merge(market_df, how = 'left', on = ['marketday','hourending','peaktype','month'])
market_lmp_nodes = market_lmp.merge(nodes_df, how='left', left_on='settlementpoint', right_on='nodename')
offers = offers.rename(columns={'delivery_date':'marketday','hour_ending':'hourending','settlement_point':'settlementpoint'})

#Join Energy Only Offers with joined Market/Price df
data_ercot = offers.merge(market_lmp_nodes, how = 'left', on = ['marketday','hourending','settlementpoint'])
data_ercot = data_ercot.drop(columns=['nodename'])
data_ercot.loc[(data_ercot.nearest_weatherstation == 'TM - Nuevo Laredo/Intl'),'nearest_weatherstation']='TX - Laredo/Intl'
#Nuevo Laredo Intl is close enough to Laredo Intl to use the same weatherstation
data_ercot = data_ercot.fillna(0.0)
data_ercot.loc[(data_ercot.zone == 0.0),'zone']='Other'

#data_ercot is a merged table and consists of ALL ERCOT DATA: Energy Only Offers, market, LMP, and nodes data

In [7]:
#Confirm no loss of primary Energy Only Offer transaction data
print(offers.shape)
print(data_ercot.shape)

(138748, 28)
(138748, 44)


## Join 3rd Party Data to data_ercot =  data

In [8]:
#Merge Stock Data together
stocks_df = nasdaq_df.merge(etf_df, how='left',left_on='nasdaq_date',right_on='etf_date')
stocks_df = stocks_df.merge(dowjones_df, how='left',left_on='nasdaq_date',right_on='dowjones_date')
stocks_df = stocks_df.drop(columns=['dowjones_date','etf_date'])
stocks_df = stocks_df.rename(columns={'nasdaq_date':'date'})
stocks_df['date'] = pd.to_datetime(stocks_df['date'],infer_datetime_format=True)
stocks_df['date'] = stocks_df['date'].dt.strftime('%m/%d/%Y')

In [9]:
#Merge Weather together
weather_df = pd.concat([weather_df_1_trim, weather_df_2_trim,weather_df_3_trim])
weather_df['hourlydrybulbtemperature'] = pd.to_numeric(weather_df['hourlydrybulbtemperature'], errors='coerce', downcast=None)
weather_df['hourlystationpressure'] = pd.to_numeric(weather_df['hourlystationpressure'], errors='coerce', downcast=None)
weather_df['hourlywinddirection'] = pd.to_numeric(weather_df['hourlywinddirection'], errors='coerce', downcast=None)
weather_df['hourlywindspeed'] = pd.to_numeric(weather_df['hourlywindspeed'], errors='coerce', downcast=None)

weather_df['date'] = pd.to_datetime(weather_df['date'],infer_datetime_format=True)
weather_df['hourending'] = [d.time() for d in weather_df['date']]
mask = (weather_df['date'] >= '2019-01-02') & (weather_df['date'] < '2019-07-13')
weather_df = weather_df.loc[mask]
hours = [math.ceil((t.hour * 60 + t.minute) / 60) for t in weather_df['hourending']]
weather_df['hour'] = hours
weather_df['hour']= weather_df['hour'].apply(str).apply(int)
weather_df = weather_df.loc[(weather_df['hour'] > 0)]
weather_df['date'] = weather_df['date'].dt.strftime('%m/%d/%Y')
c_maxes = weather_df.groupby(['station', 'date','hour']).hourending.transform(max)
weather_df = weather_df.loc[weather_df.hourending == c_maxes]
weather_df['station'] = weather_df['station'].map({72267023042: 'TX - Lubbock/Intl',
                                                                 72251012924: 'TX - Corpus Christi/Intl',
                                                                 72266013962: 'TX - Abilene/Municipal', 
                                                                 72250012919: 'TX - Brownsville/Intl', 
                                                                 72351013966: 'TX - Wichita Falls/Sheppard AFB',
                                                                 72261022010: 'TX - Del Rio/Intl',
                                                                 72265023023: 'TX - Midland-Odessa',
                                                                 72253012921: 'TX - San Antonio/Intl',
                                                                 72363023047: 'TX - Amarillo/Intl',
                                                                 72248013957: 'LA - Shreveport/Regional',
                                                                 72263023034: 'TX - San Angelo/Mathis',
                                                                 72265623040: 'TX - Wink/Winkler County',
                                                                 72258013960: 'TX - Dallas/Love Field',
                                                                 72243012960: 'TX - Houston/Intercontinental',
                                                                 72261823091: 'TX - Fort Stockton',
                                                                 72252012907: 'TX - Laredo/Intl',
                                                                 74641013975: 'OK - Gage/Shattuck',
                                                                 72259303985: 'TX - Dallas-Fort Worth/Intl'})


In [10]:
#Merge ERCOT data with weather and stock data 
weather_df=weather_df.drop_duplicates(keep='first')

stocks_weather_df = weather_df.merge(stocks_df, how = 'left', on='date')
stocks_weather_df = stocks_weather_df.fillna(0.00)
stocks_weather_df=stocks_weather_df.drop_duplicates(keep='first')

data = data_ercot.merge(stocks_weather_df.drop_duplicates(['date','hour','station']), 
                       how = 'left', 
                       left_on = ['marketday','hourending','nearest_weatherstation'], 
                       right_on=['date','hour','station'])
data =data.drop(columns=['energy_only_offer_id','date_x','hourending_x','hour','hourending_y','date_y','month'])
#data is now the final merged table, consists of all ERCOT and 3rd party data
#Ready to model.


In [11]:
#Confirm no loss of primary PTP transaction data
print(data_ercot.shape)
print(data.shape)


(138748, 44)
(138748, 61)


In [None]:
data.to_csv("energy_only_offers_market_lmp_node_thirdparty_model_ready.csv")