In [1]:
from predict_utils import predict_custom_trained_model_sample
import data_utils

import altair as alt
from datetime import datetime, timedelta

import pandas as pd
pd.set_option("display.max_columns", 500)

In [2]:
eval_df = pd.read_csv('./data/validate_energy.csv')
#print(f'eval shape: {eval_df.shape}')

# column dictionary
col_dict = data_utils.get_column_input_dict(eval_df)

keep_cols = col_dict['STRING_COLS'] + col_dict['NUMERIC_COLS']

eval_df_test = eval_df[keep_cols].iloc[0:1,:].to_dict()

#instances = eval_df[keep_cols].iloc[0:1, :].to_dict(orient='records')
instance = {key: list(value.values()) for key, value in eval_df_test.items()}

In [3]:
train_df = pd.read_csv('./data/train_energy.csv')

def z_score(x, mean_x, std_x):
    
    return (x - mean_x) / std_x

def normalize_data(df):
    
    df = df.copy()
    
    # energy columns
    energy_mean = train_df['total_consumption_T0'].mean()
    energy_std = train_df['total_consumption_T0'].std()
    energy_cols = [col for col in train_df.columns if col.startswith('total_consumption_T_minus_')]
    
    for col in energy_cols:
        df[col] = df[col].apply(z_score, args=(energy_mean, energy_std))
        
    # min temp columns 
    min_temp_mean = train_df['min_temp_T0'].mean()
    min_temp_std = train_df['min_temp_T0'].std()
    min_temp_cols = [col for col in train_df.columns if col.startswith('min_temp_T')]
    
    for col in min_temp_cols:
        df[col] = df[col].apply(z_score, args=(min_temp_mean, min_temp_std))
        
    # max temp columns
    max_temp_mean = train_df['max_temp_T0'].mean()
    max_temp_std = train_df['max_temp_T0'].std()
    max_temp_cols = [col for col in train_df.columns if col.startswith('max_temp_T')]
    
    for col in max_temp_cols:
        df[col] = df[col].apply(z_score, args=(max_temp_mean, max_temp_std))
        
    return df

In [4]:
eval_df_z_scored = normalize_data(eval_df)

In [5]:
api_endpoint = 'us-east1-aiplatform.googleapis.com'

predict_custom_trained_model_sample(
    project="186845041058",
    endpoint_id="2077637171845529600",
    location="us-east1",
    instance_dict=instance,
    api_endpoint = api_endpoint)

response
 deployed_model_id: 6415694328538071040


[[177.69603, 788.57605, 438.996399, 217.909531, 317.126, 303.712646, 67.7597656, 715.994873, -100.426208, 319.08963, 466.370728, 52.1604958, -258.857544, 551.297, -257.126129, 837.192566, -278.087982, 269.025146, 306.597229, 207.47522, -75.7512512, -238.005829, 616.542603, 35.6343956, -265.874939, 86.8396835, -375.018616, 93.4632874]]

In [6]:
datetime_cols = ['start_time', 'end_time', 'end_date', 'start_date']
raw_data = pd.read_csv('./data/portugal_energy_clean.csv', parse_dates=datetime_cols)
raw_data.head()

Unnamed: 0,start_time,end_time,end_date,start_date,day_pod,user_id,total_consumption,total_ff_values,time_points_in_period,day_of_year_sin,day_of_year_cos,JD,max_temp,min_temp,holiday
0,2010-12-31 19:00:00,2011-01-01 07:00:00,2011-01-01,2010-12-31,0,MT_124,1952.15311,0,27,0.017166,0.999853,1,8.766667,1.788095,major
1,2010-12-31 19:00:00,2011-01-01 07:00:00,2011-01-01,2010-12-31,0,MT_131,7708.333333,0,27,0.017166,0.999853,1,8.766667,1.788095,major
2,2010-12-31 19:00:00,2011-01-01 07:00:00,2011-01-01,2010-12-31,0,MT_132,1156.10712,0,27,0.017166,0.999853,1,8.766667,1.788095,major
3,2010-12-31 19:00:00,2011-01-01 07:00:00,2011-01-01,2010-12-31,0,MT_156,1843.986276,0,27,0.017166,0.999853,1,8.766667,1.788095,major
4,2010-12-31 19:00:00,2011-01-01 07:00:00,2011-01-01,2010-12-31,0,MT_158,1047.192513,0,27,0.017166,0.999853,1,8.766667,1.788095,major


In [7]:
def get_label(row, day_of_prediction, start_time_T0):
    
    if row['end_date'] <= day_of_prediction:
        return 'Feature Window'
    elif row['end_date'] <= start_time_T0:
        return 'Lag Window'
    else:
        return 'Prediction Window'

In [8]:
def get_test_sample(df, raw_data, keep_cols, endpoint_id):
    
    sample_df = df.sample(1)
    
    features_dict = sample_df[keep_cols].to_dict()

    #instances = eval_df[keep_cols].iloc[0:1, :].to_dict(orient='records')
    instance = {key: list(value.values()) for key, value in features_dict.items()}
    
    preds = predict_custom_trained_model_sample(
        project="186845041058",
        endpoint_id=endpoint_id,
        location="us-east1",
        instance_dict=instance,
        api_endpoint = 'us-east1-aiplatform.googleapis.com')
    
    prediction_df = pd.DataFrame({
        'pred_dates': sample_df[[f'start_time_T{i}' for i in range(28)]].values[0],
        'preds': preds[0],
        'label': ['Prediction Window' for i in range(len(preds[0]))]
    })
    
    user = sample_df['user_id'].values[0]
    day_pod = sample_df['day_pod'].values[0]
    if day_pod:
        pod_desc = 'Day'
    else: pod_desc = 'Night'
    day_of_prediction = pd.to_datetime(sample_df['day_of_prediction'].values[0])
    start_time_T27 = pd.to_datetime(sample_df['start_time_T27'].values[0])
    start_time_T0 = pd.to_datetime(sample_df['start_time_T0'].values[0])
    
    mask = ((raw_data['user_id']==user) 
            & (raw_data['day_pod']==day_pod)
            & (raw_data['end_date']<=start_time_T27)
            & (raw_data['end_date']>=day_of_prediction-timedelta(days=(52*7)-28))
           )
    _df = raw_data[mask].copy()
    _df['label'] = _df.apply(get_label, args=(day_of_prediction, start_time_T0), axis=1)
    
    
    chart1 = alt.Chart(_df, height=200, width=800).mark_bar(size=1).encode(
        x=alt.X('end_date:T', axis=alt.Axis(title=None, labelFontSize=12)),
        y=alt.Y('total_consumption', axis=alt.Axis(title='Energy Consumed', titleFontWeight=400, titleFontSize=12, labelFontSize=12)),
        color=alt.Color('label', legend=None) #alt.Legend(title='', orient='bottom', labelFontSize=15))
            ).properties(
            title=f"Actual Energy Consumption ({user}, {pod_desc})"
        )
    
    mask2 = ((_df['user_id']==user) 
            & (_df['day_pod']==day_pod)
            & (_df['end_date']<start_time_T0)
            & (_df['end_date']>=day_of_prediction-timedelta(days=(52*7)-28))
           )
    
    
    chart2_historical = alt.Chart(_df[mask2], height=200, width=800).mark_bar(size=1).encode(
        x=alt.X('end_date:T', axis=alt.Axis(title=None, labelFontSize=12)),
        y=alt.Y('total_consumption', axis=alt.Axis(title='Energy Consumed', titleFontWeight=400, titleFontSize=12, labelFontSize=12)),
        color=alt.Color('label', legend=alt.Legend(title='', orient='bottom', labelFontSize=15))
            ).properties(
            title=f"Predicted Energy Consumption ({user}, {pod_desc})"
        )
    
    chart2_pred = alt.Chart(prediction_df, height=200, width=800).mark_bar(size=1).encode(
        x=alt.X('pred_dates:T', axis=alt.Axis(title=None, labelFontSize=12)),
        y=alt.Y('preds', axis=alt.Axis(title='Energy Consumed', titleFontWeight=400, titleFontSize=12, labelFontSize=12)),
        color=alt.Color('label', legend=alt.Legend(title='', orient='bottom', labelFontSize=15))
            ).properties(
            title=f"Predicted Energy Consumption ({user}, {pod_desc})"
        )
    
    return chart1, chart2_historical+chart2_pred
    

In [40]:
col_dict = data_utils.get_column_input_dict(eval_df)
keep_cols = col_dict['STRING_COLS'] + col_dict['NUMERIC_COLS']

dnn_sigtest_endpoint_id = '2077637171845529600'
dnn_normalized_fc_endpoint_id = '2620320926943674368'

c1, c2 = get_test_sample(eval_df_z_scored, raw_data, keep_cols, endpoint_id=dnn_normalized_fc_endpoint_id)

c1.display()
c2.display()

response
 deployed_model_id: 4995934546009522176
