## Loading data into GCS

In [19]:
from google.cloud import storage
from google.oauth2 import service_account
import os
from datetime import datetime, timedelta

In [6]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS']

'/Users/ns/github-repos/energy-dashboard/energy-dashboard.json'

In [7]:
credentials = service_account.Credentials.from_service_account_file(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])
project_id = 'ml-energy-dashboard'

In [9]:
bucket_name = 'ml-energy-dashboard-raw-data'
storage_client = storage.Client(credentials=credentials, project=project_id)

if storage_client.bucket(bucket_name):
    print(f'Bucket exists: {bucket_name}')
else:
    print(f'Creating bucket: {bucket_name}')
    storage_client.create_bucket(bucket_name)


Bucket exists: ml-energy-dashboard-raw-data


In [12]:
bucket=storage_client.get_bucket(bucket_name)



In [11]:
from entsoe import EntsoePandasClient
import pandas as pd

In [36]:
client = EntsoePandasClient(api_key = os.environ['ENTSOE_TOKEN'])
def load_entsoe(time_pair):
    data = client.query_load("ES",
                  start=pd.Timestamp(time_pair[0], tz='UTC'), 
                  end=pd.Timestamp(time_pair[1], tz='UTC'))
    return data

In [17]:
data

2020-03-08 00:00:00+01:00    24985.0
2020-03-08 01:00:00+01:00    23210.0
2020-03-08 02:00:00+01:00    21964.0
2020-03-08 03:00:00+01:00    21315.0
2020-03-08 04:00:00+01:00    21078.0
2020-03-08 05:00:00+01:00    21215.0
2020-03-08 06:00:00+01:00    21555.0
2020-03-08 07:00:00+01:00    21738.0
2020-03-08 08:00:00+01:00    22702.0
2020-03-08 09:00:00+01:00    24647.0
2020-03-08 10:00:00+01:00    26384.0
2020-03-08 11:00:00+01:00    26802.0
2020-03-08 12:00:00+01:00    26621.0
2020-03-08 13:00:00+01:00    26791.0
2020-03-08 14:00:00+01:00    26445.0
2020-03-08 15:00:00+01:00    25292.0
2020-03-08 16:00:00+01:00    24512.0
2020-03-08 17:00:00+01:00    24375.0
2020-03-08 18:00:00+01:00    24930.0
2020-03-08 19:00:00+01:00    27636.0
2020-03-08 20:00:00+01:00    29788.0
2020-03-08 21:00:00+01:00    29801.0
2020-03-08 22:00:00+01:00    28619.0
2020-03-08 23:00:00+01:00    26465.0
dtype: float64

In [34]:
#generate a list of dates for the last 90 days
end = datetime.today()
start = datetime.today() + timedelta(-90)
delta = end-start

date_list = [(start + timedelta(i)).strftime('%Y%m%d') for i in range(delta.days+1)]

time_pairs = list()

for i in range(delta.days+1):
    begin_time = (start + timedelta(i-1)).strftime('%Y%m%d')
    begin_time = f'{begin_time}T2300'
    end_time = (start + timedelta(i)).strftime('%Y%m%d')
    end_time = f'{end_time}T2300'
    
    time_pairs.append((begin_time, end_time))
    

print(start)
print(end)
print(time_pairs)

2019-12-11 21:10:46.190587
2020-03-10 21:10:46.190520
[('20191210T2300', '20191211T2300'), ('20191211T2300', '20191212T2300'), ('20191212T2300', '20191213T2300'), ('20191213T2300', '20191214T2300'), ('20191214T2300', '20191215T2300'), ('20191215T2300', '20191216T2300'), ('20191216T2300', '20191217T2300'), ('20191217T2300', '20191218T2300'), ('20191218T2300', '20191219T2300'), ('20191219T2300', '20191220T2300'), ('20191220T2300', '20191221T2300'), ('20191221T2300', '20191222T2300'), ('20191222T2300', '20191223T2300'), ('20191223T2300', '20191224T2300'), ('20191224T2300', '20191225T2300'), ('20191225T2300', '20191226T2300'), ('20191226T2300', '20191227T2300'), ('20191227T2300', '20191228T2300'), ('20191228T2300', '20191229T2300'), ('20191229T2300', '20191230T2300'), ('20191230T2300', '20191231T2300'), ('20191231T2300', '20200101T2300'), ('20200101T2300', '20200102T2300'), ('20200102T2300', '20200103T2300'), ('20200103T2300', '20200104T2300'), ('20200104T2300', '20200105T2300'), ('2020010

In [39]:
#download all dates in the time pairs and upload to GCS
folder='raw-days'
bucket=storage_client.get_bucket(bucket_name)

for time_pair in time_pairs:
    s = time_pair[0]
    e = time_pair[1]
    file_name = f'es-energy-demand-{s}-{e}'
    blob = bucket.blob(f'{folder}/{file_name}')
    
    data = load_entsoe(time_pair)
    
    blob.upload_from_string(data.to_json())
    print(f'Uploaded {file_name}...')

Uploaded es-energy-demand-20191210T2300-20191211T2300...
Uploaded es-energy-demand-20191211T2300-20191212T2300...
Uploaded es-energy-demand-20191212T2300-20191213T2300...
Uploaded es-energy-demand-20191213T2300-20191214T2300...
Uploaded es-energy-demand-20191214T2300-20191215T2300...
Uploaded es-energy-demand-20191215T2300-20191216T2300...
Uploaded es-energy-demand-20191216T2300-20191217T2300...
Uploaded es-energy-demand-20191217T2300-20191218T2300...
Uploaded es-energy-demand-20191218T2300-20191219T2300...
Uploaded es-energy-demand-20191219T2300-20191220T2300...
Uploaded es-energy-demand-20191220T2300-20191221T2300...
Uploaded es-energy-demand-20191221T2300-20191222T2300...
Uploaded es-energy-demand-20191222T2300-20191223T2300...
Uploaded es-energy-demand-20191223T2300-20191224T2300...
Uploaded es-energy-demand-20191224T2300-20191225T2300...
Uploaded es-energy-demand-20191225T2300-20191226T2300...
Uploaded es-energy-demand-20191226T2300-20191227T2300...
Uploaded es-energy-demand-20191

In [18]:
!pwd

/Users/ns/github-repos/energy-dashboard/notebooks


In [21]:
path='/Users/ns/github-repos/energy-dashboard/data'
name = 'es_load_data_20200305-16'
#data.to_csv(os.path.join(path, name))

In [22]:
blob.upload_from_filename(os.path.join(path, name))