## Machine Learning on Google Cloud Asset Creation 
This notebook will automatically perform the steps of labs in this course that take a long time to run, so you as an instructor can show/demo the environment of a successful lab run. The idea is that by doing this ahead of time you can get familiar with the end results, and not have to spend hours of class time waiting for resources and assets to be created (managed datasets, AutoML models, etc).

In [None]:
# Setup your dependencies
import os

# The Google Cloud Notebook product has specific requirements
IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

# Google Cloud Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_GOOGLE_CLOUD_NOTEBOOK:
    USER_FLAG = "--user"

# Upgrade the specified package to the newest available version
! pip install {USER_FLAG} --upgrade google-cloud-aiplatform google-cloud-storage jsonlines

#### Restart Your Kernel before moving forward

### Creates Vertex AI Managed Datasets
* Image Dataset of flowers for single-label classification
* Video Dataset of actions for video-classification
* Tabular Dataset for pet adoption classification 

In [None]:
# Set project id and initialize Vertex AI via Python SDK 
import os 
from google.cloud import aiplatform
from datetime import datetime
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

# Get your Google Cloud project ID from gcloud
if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    
aiplatform.init(project=PROJECT_ID, location="us-central1")

# Flower Image Dataset - Lab 1
flower_src = "gs://cloud-samples-data/ai-platform/flowers/flowers.csv"

flower_dataset = aiplatform.ImageDataset.create(
    display_name="flowers"
)

flower_dataset.import_data(
    gcs_source=flower_src,
    import_schema_uri="gs://google-cloud-aiplatform/schema/dataset/ioformat/image_classification_single_label_io_format_1.0.0.yaml",
    sync=False
)

# Video Action Dataset - Lab 2
video_src = "gs://automl-video-demo-data/hmdb_split1_5classes_all.csv"

video_dataset = aiplatform.VideoDataset.create(
    display_name="action_clips"
)

video_dataset.import_data(
    gcs_source=video_src,
    import_schema_uri="gs://google-cloud-aiplatform/schema/dataset/ioformat/video_classification_io_format_1.0.0.yaml",
    sync=False
)

# Tabular Dataset - Lab 3
!gsutil mb -l us-central1 gs://$PROJECT_ID
IMPORT_FILE = "petfinder-tabular-classification.csv"
!gsutil cp gs://cloud-samples-data/ai-platform-unified/datasets/tabular/{IMPORT_FILE} gs://{PROJECT_ID}/data/

tabular_src = f"gs://{PROJECT_ID}/data/{IMPORT_FILE}"

aiplatform.TabularDataset.create(
    display_name="petfinder-tabular-dataset",
    gcs_source=tabular_src,
    sync=False
)

#### WAIT. Do not go on until datasets have finished importing the data. ~15-20 minutes.

### Trains AutoML Models
* Flower Image Classification (from Image Dataset)
* Video Classification (from Video Dataset)
* Pet adoption 

In [None]:
# Set project id and initialize Vertex AI via Python SDK 
import os 
from google.cloud import aiplatform
from datetime import datetime
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

# Get your Google Cloud project ID from gcloud
if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    
aiplatform.init(project=PROJECT_ID, location="us-central1")

import os 
from google.cloud import aiplatform
from datetime import datetime
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

# Get your Google Cloud project ID from gcloud
if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    
aiplatform.init(project=PROJECT_ID, location="us-central1")

# Flower model - Lab 1
flower_id = aiplatform.ImageDataset.list(filter='display_name="flowers"')[0].resource_name
flower_dataset = aiplatform.ImageDataset(flower_id)

flowers_job = aiplatform.AutoMLImageTrainingJob(
    display_name=f'flowers-training-job-{TIMESTAMP}'
)

flowers_job.run(
    dataset=flower_dataset,
    sync=False
)

# Video classification model - Lab 2
video_id = aiplatform.VideoDataset.list(filter='display_name="action_clips"')[0].resource_name
video_dataset = aiplatform.VideoDataset(video_id)

video_job = aiplatform.AutoMLVideoTrainingJob(
    display_name=f'video-training-job-{TIMESTAMP}'
)

video_job.run(
    dataset=video_dataset,
    sync=False
)

# Pet adoption model (tabular) - Lab 3
tabular_id =aiplatform.TabularDataset.list(filter='display_name="petfinder-tabular-dataset"')[0].resource_name
tabular_dataset = aiplatform.TabularDataset(tabular_id)

tabular_job = aiplatform.AutoMLTabularTrainingJob(
    display_name=f'petfinder-training-job-{TIMESTAMP}',
    optimization_prediction_type='classification'
)

tabular_job.run(
    dataset=tabular_dataset,
    target_column='Adopted',
    sync=False
)

### BigQuery ML Models
* Penguin Body Mass Model - Lab 8 
* Taxi Tip Prediciton w/ Hyperparam tuning - Lab 9

In [None]:
# Create the dataset 
!bq --location=us mk bqml_tutorial 

#### Train Penguin Model (~5 minutes)

In [None]:
%%bigquery
CREATE OR REPLACE MODEL `bqml_tutorial.penguins_model`
OPTIONS
  (model_type='linear_reg',
    input_label_cols=['body_mass_g']) AS
SELECT
  *
FROM
  `bigquery-public-data.ml_datasets.penguins`
WHERE
  body_mass_g IS NOT NULL

#### Train Taxi Tip Model 

In [None]:
%%bigquery
CREATE TABLE `bqml_tutorial.taxi_tip_input` AS
SELECT
  * EXCEPT(tip_amount), tip_amount AS label
FROM
  `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2018`
WHERE
  tip_amount IS NOT NULL
LIMIT 100000;

CREATE MODEL `bqml_tutorial.hp_taxi_tip_model`
OPTIONS
  (model_type='linear_reg',
   num_trials=20,
   max_parallel_trials=2) AS
SELECT
  *
FROM
  `bqml_tutorial.taxi_tip_input`