# EIA Demand data via API

In [1]:
import pandas as pd
import requests
import datetime
import json

from config import api_key

### Extract

In [2]:
# Use API to get demand dataset
url = f'https://api.eia.gov/series/?api_key={api_key}&series_id=EBA.CISO-ALL.D.HL'
response = requests.get(url)

In [69]:
# Review json for format
response_json = response.json()
# print(response_json)
num_resp=len(response_json['series'][0]['data'])
print(f'{num_resp} responses')
print(response_json['series'][0]['data'][0])
print(response_json['series'][0]['data'][-1])

39105 responses
['20191218T09-08', 27494]
['20150701T01-07', 31486]


In [5]:
# Pull data for dataframe from nested dictionary
dict = []
for response in range(num_resp):
    dict.append(response_json['series'][0]['data'][response])

In [53]:
# Create datafame and check results
df = pd.DataFrame(dict)
print(df.head())
print(df.tail())

                0        1
0  20191218T09-08  27494.0
1  20191218T08-08  27642.0
2  20191218T07-08  26404.0
3  20191218T06-08  23779.0
4  20191218T05-08  22065.0
                    0        1
39100  20150701T05-07  25661.0
39101  20150701T04-07  26388.0
39102  20150701T03-07  27416.0
39103  20150701T02-07  28989.0
39104  20150701T01-07  31486.0


### Transform

In [83]:
# Rename columns
demand_df = df.rename(columns={0:'timestamp', 1:'DEMAND'})
# Clean up timestamp by removing end string (08,07)
new = demand_df['timestamp'].str.split("-", n = 1, expand = True) 
demand_df['timestamp']= new[0] 
demand_df['timestamp'] = pd.to_datetime(demand_df['timestamp'])

# Splint datetimes as needed:
# demand_df['Year'] = demand_df['Timestamp'].dt.year
# demand_df['Month'] = demand_df['Timestamp'].dt.month
# demand_df['Day'] = demand_df['Timestamp'].dt.day
demand_df['Hour'] = demand_df['timestamp'].dt.hour # Hour 0 = midnight
demand_df['date'] = demand_df['timestamp'].dt.date

# There were duplicate listing for a handful of dates
# Dropped so not to violate primary key rules
demand_df.drop_duplicates(subset ="timestamp", 
                     keep = 'last', inplace = True) 

demand_df.head(10)

Unnamed: 0,timestamp,DEMAND,Hour,date
0,2019-12-18 09:00:00,27494.0,9,2019-12-18
1,2019-12-18 08:00:00,27642.0,8,2019-12-18
2,2019-12-18 07:00:00,26404.0,7,2019-12-18
3,2019-12-18 06:00:00,23779.0,6,2019-12-18
4,2019-12-18 05:00:00,22065.0,5,2019-12-18
5,2019-12-18 04:00:00,21399.0,4,2019-12-18
6,2019-12-18 03:00:00,21413.0,3,2019-12-18
7,2019-12-18 02:00:00,21866.0,2,2019-12-18
8,2019-12-18 01:00:00,22741.0,1,2019-12-18
9,2019-12-18 00:00:00,24072.0,0,2019-12-18


In [84]:
# In case this is helpful
demand_df.to_csv('eia_demand_data.csv')

### Load

In [85]:
from sqlalchemy import create_engine
from config import username, password

engine = create_engine('postgresql://{}:{}@localhost:5432/California_Energy_DB'.format(username,password))
con = engine.connect()

name_demand = "hourlydemand"
schema = 'Demand'
demand_df.to_sql(name=name_demand,con=con,schema=schema, if_exists='append',
                            index=False)