## Request grpah data function for plotting

In [1]:
import pandas as pd
import numpy as np
from google.cloud import storage
from google.oauth2 import service_account
import os
from datetime import datetime, timedelta

### Download multiple dates from GCS
1. Download actual loads
2. Download persistance forecasts

In [2]:
creds = service_account.Credentials.from_service_account_file(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])
client = storage.Client(credentials=creds, project='ml-energy-dashboard')


In [78]:
def get_time_dates(period, pairs=False):
        end = datetime.today()
        start = datetime.today() + timedelta(-period)
        delta = end-start

        if pairs:
            time_pairs = list()

            for i in range(delta.days+1):
                begin_time = (start + timedelta(i-1)).strftime('%Y%m%d')
                begin_time = f'{begin_time}T2300'
                end_time = (start + timedelta(i)).strftime('%Y%m%d')
                end_time = f'{end_time}T2300'

                time_pairs.append((begin_time, end_time))
            return time_pairs
        else:
            dates = list()
            for i in range(delta.days+1):
                date = (start + timedelta(i+1)).strftime('%Y%m%d')
                dates.append(date)
                
            return dates

In [71]:
get_time_dates(3)

['20200310', '20200311', '20200312']


In [19]:
def pull_all_data(request):
    from flask import jsonify
    from datetime import datetime, timedelta
    import pandas
    from google.cloud import storage
    from google.oauth2 import service_account
    
    BUCKET='ml-energy-dashboard-raw-data'
    FOLDER_DOWN='raw-days'
    FOLDER_PERSIST = 'persistance_forecasts'

    def get_time_dates(period, pairs=False):
        end = datetime.today()
        start = datetime.today() + timedelta(-period)
        delta = end-start

        if pairs:
            time_pairs = list()

            for i in range(delta.days+1):
                begin_time = (start + timedelta(i-1)).strftime('%Y%m%d')
                begin_time = f'{begin_time}T2300'
                end_time = (start + timedelta(i)).strftime('%Y%m%d')
                end_time = f'{end_time}T2300'

                time_pairs.append((begin_time, end_time))
            return time_pairs
        else:
            dates = list()
            for i in range(delta.days+1):
                date = (start + timedelta(i+1)).strftime('%Y%m%d')
                dates.append(date)
                
            return dates
    
    def gcs_load_name(start, end):
        return f'es-energy-demand-{start}-{end}'

    def reset_data_index(data_list):

        data = pd.concat(data_list, axis=0)
        data.index = data.index.tz_localize('UTC').tz_convert('Europe/Madrid')

        return data
    def get_gcs_data(client, bucket_name, folder_name, file_name):

            bucket = client.get_bucket(bucket_name)
            path = f'{folder_name}/{file_name}'
            print(path)
            blob = bucket.blob(f'{folder_name}/{file_name}')
            data_json = blob.download_as_string()
            return data_json

    def get_data(client, time_pairs):
        data_list = list()
        for time_pair in time_pairs:

            file_name = f'es-energy-demand-{time_pair[0]}-{time_pair[1]}'
            data = get_gcs_data(client, BUCKET, FOLDER_DOWN, file_name)
            data = pd.read_json(data, typ='series', orient='records', keep_default_dates=False)
            data_list.append(data)

        data = reset_data_index(data_list)
        return data

    def get_persistence(client, dates):
        data_list = list()
        for date in dates:
            file_name = f'es-persistance-forecasts-{date}'
            data_json = get_gcs_data(client, BUCKET, FOLDER_PERSIST, file_name)
            data = pd.read_json(data_json)
            data_list.append(data)


        data = pd.concat(data_list, axis=0)
        return data
    
    payload = {"success": False}
    if "download" in request and request['download']:
        
        payload['df_loads'] = str(get_data(client, get_time_dates(7, pairs=True)).to_json())
#         persistance = get_persistence(client, get_time_dates(2, pairs=False))
#         payload['df_naive'] = persistance['naive']
#         payload['df_MA3'] = persistance['MA3-day']
#         payload['df_MA3_hbh'] = persistance['MA30day-hbh']
#         payload['success']=True
        
    return payload

In [72]:
data = get_data(client, get_time_dates(7, pairs=True))

raw-days/es-energy-demand-20200304T2300-20200305T2300
raw-days/es-energy-demand-20200305T2300-20200306T2300
raw-days/es-energy-demand-20200306T2300-20200307T2300
raw-days/es-energy-demand-20200307T2300-20200308T2300
raw-days/es-energy-demand-20200308T2300-20200309T2300
raw-days/es-energy-demand-20200309T2300-20200310T2300
raw-days/es-energy-demand-20200310T2300-20200311T2300


In [73]:
data

2020-03-05 00:00:00+01:00    26268.0
2020-03-05 01:00:00+01:00    24718.0
2020-03-05 02:00:00+01:00    23825.0
2020-03-05 03:00:00+01:00    23414.0
2020-03-05 04:00:00+01:00    23354.0
                              ...   
2020-03-11 19:00:00+01:00    32066.0
2020-03-11 20:00:00+01:00    33883.0
2020-03-11 21:00:00+01:00    32969.0
2020-03-11 22:00:00+01:00    30439.0
2020-03-11 23:00:00+01:00    27771.0
Length: 168, dtype: float64

In [77]:
type(get_time_dates(2))

['20200311', '20200312']


NoneType

In [79]:
get_persistence(client, get_time_dates(2, pairs=False))

persistance_forecasts/es-persistance-forecasts-20200311
persistance_forecasts/es-persistance-forecasts-20200312


Unnamed: 0,naive,MA3-day,MA30day-hbh
2020-03-11 00:00:00,26025.0,27061.367347,25188.666667
2020-03-11 01:00:00,24464.0,27009.42,23581.333333
2020-03-11 02:00:00,23602.0,26942.607843,22518.333333
2020-03-11 03:00:00,22989.0,26866.576923,21972.666667
2020-03-11 04:00:00,23066.0,26794.867925,21967.0
2020-03-11 05:00:00,23649.0,26736.611111,22489.666667
2020-03-11 06:00:00,26358.0,26729.727273,24559.0
2020-03-11 07:00:00,29863.0,26785.678571,27079.0
2020-03-11 08:00:00,32001.0,26877.175439,28943.0
2020-03-11 09:00:00,32997.0,26982.689655,30304.0


In [20]:
request={'download':'true'}
t = pull_all_data(request)
t

raw-days/es-energy-demand-20200304T2300-20200305T2300
raw-days/es-energy-demand-20200305T2300-20200306T2300
raw-days/es-energy-demand-20200306T2300-20200307T2300
raw-days/es-energy-demand-20200307T2300-20200308T2300
raw-days/es-energy-demand-20200308T2300-20200309T2300
raw-days/es-energy-demand-20200309T2300-20200310T2300
raw-days/es-energy-demand-20200310T2300-20200311T2300


{'success': False,
 'df_loads': '{"1583362800000":26268.0,"1583366400000":24718.0,"1583370000000":23825.0,"1583373600000":23414.0,"1583377200000":23354.0,"1583380800000":23928.0,"1583384400000":26713.0,"1583388000000":30213.0,"1583391600000":32268.0,"1583395200000":33178.0,"1583398800000":33492.0,"1583402400000":33529.0,"1583406000000":33489.0,"1583409600000":33304.0,"1583413200000":32179.0,"1583416800000":31512.0,"1583420400000":30990.0,"1583424000000":30650.0,"1583427600000":30564.0,"1583431200000":32773.0,"1583434800000":34093.0,"1583438400000":33211.0,"1583442000000":30662.0,"1583445600000":28013.0,"1583449200000":26278.0,"1583452800000":24829.0,"1583456400000":23817.0,"1583460000000":23421.0,"1583463600000":23440.0,"1583467200000":24057.0,"1583470800000":26674.0,"1583474400000":30248.0,"1583478000000":32430.0,"1583481600000":33617.0,"1583485200000":34134.0,"1583488800000":34204.0,"1583492400000":34018.0,"1583496000000":33700.0,"1583499600000":32470.0,"1583503200000":31544.0,"15835

In [21]:
import requests
def request_graph_data():
    
    url="https://us-central1-ml-energy-dashboard.cloudfunctions.net/pull-all-data"
    result = requests.post(url, json={"download": 'true'})

    return result
    
    df_load = pd.read_json(result.json()['df_load'], typ='series', orient='index')
    df_naive = pd.read_json(result.json()['df_naive'], typ='series', orient='index')
    df_MA3 = pd.read_json(result.json()['df_MA3'], typ='series', orient='index')
    df_MA3_hbh = pd.read_json(result.json()['df_MA3_hbh'], typ='series', orient='index')

    return df_load, df_naive, df_MA3, df_MA3_hbh

In [22]:
request_graph_data()

<Response [500]>