In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vision Workshop - Environment Setup

## Overview

[Vision Workshop](https://github.com/mblanc/vision-workshop) is a series of labs on how to build an image classification system on Google Cloud. Throughout the Vision Workshop labs, you will learn how to read image data stored in data lake, perform exploratory data analysis (EDA), train a model, register your model in a model registry, evaluate your model, deploy your model to an endpoint, do real-time inference on your model.

### Objective

Before you run this notebook, make sure that you have completed the steps in [README](README.md).

In this notebook, you will setup your environment for Fraudfinder to be used in subsequent labs.

This lab uses the following Google Cloud services and resources:

- [Vertex AI](https://cloud.google.com/vertex-ai/)
- [Google Cloud Storage](https://cloud.google.com/storage)

Steps performed in this notebook:

- Setup your environment.
- Load image data into Cloud Storage.
- Read data from Cloud Storage.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage
pricing](https://cloud.google.com/storage/pricing)
to generate a cost estimate based on your projected usage.

### Install additional packages

Install the following packages required to execute this notebook.

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

!pip install --upgrade --no-warn-conflicts '{USER_FLAG}' -q \
    google-cloud-pubsub==2.13.6 \
    google-api-core==2.8.2 \
    google-apitools==0.5.32 \
    plotly==5.10.0 \
    itables==1.2.0 \
    apache_beam==2.40.0 \
    google-cloud-pipeline-components \
    kfp \
    tensorflow==2.8.3 \
    tensorflow_datasets \
    tensorflow_hub \
    scikit-learn

After you install the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
# Automatically restart kernel after installs
import os

if not os.getenv("IS_TESTING"):
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### Setup your environment

Run the next cells to import libraries used in this notebook and configure some options.

Run the next cell to set your project ID and some of the other constants used in the lab.  

In [None]:
import random
import string

# Generate unique ID to help w/ unique naming of certain pieces
ID = "".join(random.choices(string.ascii_lowercase + string.digits, k=5))

GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-vision-workshop"
REGION = "europe-west4"

### Create a Google Cloud Storage bucket and save the config data.

Next, we will create a Google Cloud Storage bucket and will save the config data in this bucket. After the cell operation finishes, you can navigate to [Google Cloud Storage](https://console.cloud.google.com/storage/) to see the GCS bucket. 

In [None]:
config = f"""
BUCKET_NAME          = \"{BUCKET_NAME}\"
PROJECT              = \"{PROJECT_ID}\"
REGION               = \"{REGION}\"
ID                   = \"{ID}\"
MODEL_NAME           = \"vision_workshop_model\"
ENDPOINT_NAME        = \"vision_workshop_endpoint\"
"""

!gsutil mb -l {REGION} gs://{BUCKET_NAME}

!echo '{config}' | gsutil cp - gs://{BUCKET_NAME}/config/notebook_env.py

### Copy the data into Google Cloud Storage

Now we will copy the image dataset and ingest it into Google Cloud Storage.

In [None]:
!gsutil -m cp -r gs://cloud-samples-data/ai-platform/flowers/flowers_200_folders.zip .

In [None]:
#!gsutil ls gs://cloud-samples-data/ai-platform/flowers/

In [None]:
!mkdir sample

In [None]:
!unzip flowers_200_folders.zip -d sample

In [None]:
!gsutil -m cp -r sample gs://{BUCKET_NAME}/sample

In [None]:
%%capture
!gsutil -m rm -rf gs://{BUCKET_NAME}/flowers

In [None]:
%%capture

!gsutil -m cp -r gs://cloud-samples-data/ai-platform/flowers/ gs://{BUCKET_NAME}/flowers

In [None]:
%%capture

# remove unused files
!gsutil -m rm -rf gs://{BUCKET_NAME}/flowers/flowers gs://{BUCKET_NAME}/flowers/*.txt gs://{BUCKET_NAME}/flowers/*.zip gs://{BUCKET_NAME}/flowers/*.csv gs://{BUCKET_NAME}/flowers/*.ckpt

### Check data in Google Cloud Storage

After ingesting our data into GCS, let's check your dataset in your Google Cloud Storage bucket.

In [None]:
print("Image dataset in the Cloud Storage bucket:")
print(
    f"https://console.cloud.google.com/storage/browser/{BUCKET_NAME}/flowers"
)

### END

Now you can go to the next notebook `01_exploratory_data_analysis.ipynb`