In [1]:
import numpy as np
import pandas as pd
import requests
import json

In [2]:
api_key = 'bc8c4348f7c30988e817d0b1b54441c5'

In [3]:
api_1 = 'http://api.eia.gov/series/?api_key='
api_3 = '&series_id=EBA.CAL-ALL.D.HL'

In [4]:
url = api_1 + api_key + api_3

In [5]:
url

'http://api.eia.gov/series/?api_key=bc8c4348f7c30988e817d0b1b54441c5&series_id=EBA.CAL-ALL.D.HL'

In [6]:
r = requests.get(url)
r.status_code

200

In [7]:
pull = r.json()
pull

{'request': {'command': 'series', 'series_id': 'EBA.CAL-ALL.D.HL'},
 'series': [{'series_id': 'EBA.CAL-ALL.D.HL',
   'name': 'Demand for California (region), hourly - local time',
   'units': 'megawatthours',
   'f': 'HL',
   'description': 'Timestamps follow the ISO8601 standard (https://en.wikipedia.org/wiki/ISO_8601). Hourly representations are provided in local time for the balancing authority or region.',
   'start': '20150701T01-07',
   'end': '20210410T17-07',
   'updated': '2021-04-10T20:47:41-0400',
   'data': [['20210410T17-07', 24949],
    ['20210410T16-07', 24068],
    ['20210410T15-07', 23333],
    ['20210410T14-07', 22841],
    ['20210410T13-07', 22678],
    ['20210410T12-07', 22938],
    ['20210410T11-07', 23626],
    ['20210410T10-07', 24229],
    ['20210410T09-07', 24816],
    ['20210410T08-07', 25243],
    ['20210410T07-07', 25142],
    ['20210410T06-07', 24461],
    ['20210410T05-07', 23970],
    ['20210410T04-07', 23940],
    ['20210410T03-07', 24307],
    ['2021041

In [8]:
pull.keys()

dict_keys(['request', 'series'])

In [9]:
series = pull['series']

In [10]:
series[0].keys()

dict_keys(['series_id', 'name', 'units', 'f', 'description', 'start', 'end', 'updated', 'data'])

In [11]:
data = series[0]['data']

In [12]:
data

[['20210410T17-07', 24949],
 ['20210410T16-07', 24068],
 ['20210410T15-07', 23333],
 ['20210410T14-07', 22841],
 ['20210410T13-07', 22678],
 ['20210410T12-07', 22938],
 ['20210410T11-07', 23626],
 ['20210410T10-07', 24229],
 ['20210410T09-07', 24816],
 ['20210410T08-07', 25243],
 ['20210410T07-07', 25142],
 ['20210410T06-07', 24461],
 ['20210410T05-07', 23970],
 ['20210410T04-07', 23940],
 ['20210410T03-07', 24307],
 ['20210410T02-07', 25040],
 ['20210410T01-07', 26011],
 ['20210410T00-07', 27567],
 ['20210409T23-07', 29405],
 ['20210409T22-07', 31090],
 ['20210409T21-07', 32242],
 ['20210409T20-07', 32101],
 ['20210409T19-07', 30823],
 ['20210409T18-07', 29289],
 ['20210409T17-07', 27636],
 ['20210409T16-07', 26566],
 ['20210409T15-07', 25869],
 ['20210409T14-07', 25314],
 ['20210409T13-07', 25191],
 ['20210409T12-07', 25540],
 ['20210409T11-07', 26295],
 ['20210409T10-07', 27304],
 ['20210409T09-07', 28338],
 ['20210409T08-07', 28528],
 ['20210409T07-07', 27770],
 ['20210409T06-07', 

In [13]:
frame = pd.DataFrame(data, columns=['Time', 'Megawatthours'])

In [14]:
frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50657 entries, 0 to 50656
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Time           50657 non-null  object
 1   Megawatthours  50657 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 791.6+ KB


In [15]:
frame['Time'] = frame['Time'].apply(lambda x: x[:-3])
frame.head()

Unnamed: 0,Time,Megawatthours
0,20210410T17,24949
1,20210410T16,24068
2,20210410T15,23333
3,20210410T14,22841
4,20210410T13,22678


In [16]:
frame['Time'] = pd.to_datetime(frame['Time'])

In [17]:
frame = frame.loc[::-1]

In [18]:
frame.head()

Unnamed: 0,Time,Megawatthours
50656,2015-07-01 01:00:00,38210
50655,2015-07-01 02:00:00,35171
50654,2015-07-01 03:00:00,33243
50653,2015-07-01 04:00:00,31955
50652,2015-07-01 05:00:00,31199


In [21]:
def create_time_features(df):
    """Creates several time features from self.dataframe and
    stores the resulting dataframe in self.time_features_df
    """
    df['Year'] = df['Time'].dt.year
    df['Month'] = df['Time'].dt.month
    df['Hour'] = df['Time'].dt.hour
    df['Day_of_week'] = df['Time'].dt.dayofweek
    df['Day_of_month'] = df['Time'].dt.day
    df['Day_of_year'] = df['Time'].dt.dayofyear
    df.set_index('Time', inplace=True, drop=True)
    df = df.astype('int')
    df.reset_index(inplace=True)
    return df

In [22]:
california_time_features = create_time_features(frame)

In [23]:
california_time_features

Unnamed: 0,Time,Megawatthours,Year,Month,Hour,Day_of_week,Day_of_month,Day_of_year
0,2015-07-01 01:00:00,38210,2015,7,1,2,1,182
1,2015-07-01 02:00:00,35171,2015,7,2,2,1,182
2,2015-07-01 03:00:00,33243,2015,7,3,2,1,182
3,2015-07-01 04:00:00,31955,2015,7,4,2,1,182
4,2015-07-01 05:00:00,31199,2015,7,5,2,1,182
...,...,...,...,...,...,...,...,...
50652,2021-04-10 13:00:00,22678,2021,4,13,5,10,100
50653,2021-04-10 14:00:00,22841,2021,4,14,5,10,100
50654,2021-04-10 15:00:00,23333,2021,4,15,5,10,100
50655,2021-04-10 16:00:00,24068,2021,4,16,5,10,100


In [24]:
def create_trig_df(df):
    """Creates trigonometric sin and cos time features to 
    capture cyclical patterns in data. Stores resulting 
    dataframe in df.trig_df
    """
    
    df = df.drop(columns=['Day_of_week', 
                            'Day_of_month', 
                            'Day_of_year'])
    df['sin_day'] = [np.sin(x * (2*np.pi/24)) for x in df['Hour']]
    df['cos_day'] = [np.cos(x * (2*np.pi/24)) for x in df['Hour']]
    df['Timestamp'] = [x.timestamp() for x in df['Time']]
    s_in_year = 365.25*24*60*60
    df["sin_month"] = [np.sin((x) * (2 * np.pi / s_in_year)) for x in df["Timestamp"]]
    df["cos_month"] = [np.cos((x) * (2 * np.pi / s_in_year)) for x in df["Timestamp"]]
    df = df[['Time', 'Megawatthours', 'sin_day', 'cos_day', 'sin_month', 'cos_month']]
    return df

In [25]:
calif_trig_features = create_trig_df(california_time_features)

In [28]:
calif_trig_features.head(25)

Unnamed: 0,Time,Megawatthours,sin_day,cos_day,sin_month,cos_month
0,2015-07-01 01:00:00,38210,0.258819,0.9659258,0.031533,-0.999503
1,2015-07-01 02:00:00,35171,0.5,0.8660254,0.030816,-0.999525
2,2015-07-01 03:00:00,33243,0.7071068,0.7071068,0.0301,-0.999547
3,2015-07-01 04:00:00,31955,0.8660254,0.5,0.029383,-0.999568
4,2015-07-01 05:00:00,31199,0.9659258,0.258819,0.028667,-0.999589
5,2015-07-01 06:00:00,31540,1.0,6.123234000000001e-17,0.02795,-0.999609
6,2015-07-01 07:00:00,32551,0.9659258,-0.258819,0.027234,-0.999629
7,2015-07-01 08:00:00,34483,0.8660254,-0.5,0.026517,-0.999648
8,2015-07-01 09:00:00,37003,0.7071068,-0.7071068,0.025801,-0.999667
9,2015-07-01 10:00:00,39274,0.5,-0.8660254,0.025084,-0.999685
