## SETUP

In [None]:
PROJECT_ID = !gcloud config get-value project

In [None]:
PROJECT_ID = PROJECT_ID[0]
PROJECT_ID

'st-data-project'

In [3]:
REGION = 'us-central1'
DATASET = 'fx_data'
TABLE = 'eurusd_hourly'
RESULT_TABLE = 'eurusd_hourly_forecast'
NOTEBOOK = '02_autoforecast_with_client'
EXPERIMENT = 'auto_ml_forecast'
SERIES = 'vertexai_explore'

In [4]:
DEPLOY_COMPUTE = 'e2-micro'

**Packages**

In [5]:
from google.cloud import aiplatform
from google.cloud import bigquery

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

**Clients**

In [6]:
bq = bigquery.Client(project=PROJECT_ID)
aiplatform.init(project=PROJECT_ID, location=REGION)

In [7]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

## Create Dataset

In [16]:
dataset = aiplatform.TimeSeriesDataset.create(
    display_name = f'{NOTEBOOK}_{DATASET}_{TABLE}_{TIMESTAMP}', 
    bq_source = f'bq://{PROJECT_ID}.{DATASET}.{TABLE}',
    labels = {'notebook':f'{NOTEBOOK}', 'series':f'{SERIES}', 'experiment': f'{EXPERIMENT}'}
)

In [17]:
dataset.column_names

['close', 'open', 'high', 'low', 'cross_rate', 'ts']

In [18]:
dataset.labels

{'notebook': '02_autoforecast_with_client',
 'experiment': 'auto_ml_forecast',
 'series': 'vertexai_explore'}

In [19]:
dataset.metadata_schema_uri

'gs://google-cloud-aiplatform/schema/dataset/metadata/time_series_1.0.0.yaml'

## Training Job

In [20]:
column_specs = dict.fromkeys(
    ['ts', 'close'], 'auto')
column_specs

{'ts': 'auto', 'close': 'auto'}

In [21]:
forecasting_job = aiplatform.AutoMLForecastingTrainingJob(
    display_name = f'{SERIES}_{EXPERIMENT}_{TIMESTAMP}',
    optimization_objective = "minimize-rmse",
    column_specs = column_specs,
    labels = {'series' : f'{SERIES}', 'experiment' : f'{EXPERIMENT}'}
)

In [22]:
time_column = 'ts'
time_series_identifier_column  = 'cross_rate'
target_column = 'close'
data_granularity_unit = 'hour'
forecast_horizon = 12
training_fraction_split = 0.8
validation_fraction_split = 0.1
test_fraction_split= 0.1
budget_milli_node_hours = 250
data_source = {'big_query_data_source': f'bq://{PROJECT_ID}.{DATASET}.{TABLE}'}
export_evaluated_data_items_bigquery_destination_uri = f'bq://{PROJECT_ID}:{DATASET}:{RESULT_TABLE}'

In [23]:
export_evaluated_data_items_bigquery_destination_uri

'bq://st-data-project:fx_data:eurusd_hourly_forecast'

In [24]:
forecasting_job.run(
    # data
    dataset= dataset,
    target_column= target_column,
    time_column='ts',
    time_series_identifier_column= time_series_identifier_column,
    unavailable_at_forecast_columns= [target_column],
    available_at_forecast_columns= [time_column],
    # forecast
    forecast_horizon= forecast_horizon,
    data_granularity_unit = data_granularity_unit,
    data_granularity_count= 1,
    training_fraction_split = training_fraction_split,
    validation_fraction_split = validation_fraction_split,
    test_fraction_split = test_fraction_split,
    context_window= 400,
    # output
    export_evaluated_data_items = True,
    export_evaluated_data_items_bigquery_destination_uri = export_evaluated_data_items_bigquery_destination_uri,
    export_evaluated_data_items_override_destination = True,
    # running parameters
    validation_options = "fail-pipeline",
    budget_milli_node_hours = 1000,
    # model parameters
    model_display_name = f"{SERIES}_{EXPERIMENT}",
    model_labels = {'series' : f'{SERIES}', 'experiment' : f'{EXPERIMENT}'},
    model_id = f"model_{SERIES}_{EXPERIMENT}",
    # parent_model = parent,
    is_default_version = True,
    
    # session parameters: False means continue in local session, True waits and logs progress
    sync = True
)
    
    

RuntimeError: Training failed with:
code: 3
message: "INVALID_ARGUMENT"


In [None]:
print('s')