# Vertex AI - Bucket play around

In [None]:
# Import global modules
import os
import re
import sys
import time
import json
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import storage
from google.cloud import bigquery
from google.cloud.exceptions import NotFound


# Set global vars
d_project_config = safe_load(Path(os.getcwd()).open())

# Import local modules
from gcp import connect_bq_services, connect_storage_services

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
storage_client = connect_storage_services(d_project_config['gcp-project-name'])

## GCP bucket workaround 

### 0. Test util

In [None]:
def list_blobs(bucket_name):
    """List blob in a given bucket"""
    blobs, n_blob = storage_client.list_blobs(d_project_config['bucket']), 0
    for i, blob in enumerate(blobs):
        print(f"{i} - {blob.name}")
        n_blob += 1
    
    print(f"{n_blob} objects contained in bucket {bucket_name}")

# Test objects
d_obj_1 = {'foo': 0, 'bar': "test"}
print(f'obj1:\n {d_obj_1}')

df_obj_2 = pd.DataFrame(np.random.randn(10, 3), columns=['colA', 'colB', 'colC'])
print(f'obj2:\n {df_obj_2.head(2)}')

l_object_3 = ['foo', 'bar', 'foobar']
print(f'obj3:\n {l_object_3}')


### 1. Load object to bucket

In [None]:
list_blobs(d_project_config['bucket'])

# Init bucket
bucket = storage_client.bucket(d_project_config['bucket'])

# Load a python object (Class-like)
bucket.blob('dir_test/obj1.pickle')\
    .upload_from_string(data=pickle.dumps(d_obj_1), content_type='application/octet-stream')

# Load a dataframe to csv
bucket.blob('obj2.csv')\
    .upload_from_string(data=df_obj_2.to_csv(index=False), content_type='text/csv')

# Load a plain text file
bucket.blob('obj3')\
    .upload_from_string(data='\n'.join(l_object_3), content_type='text/plain')

# List object after load
list_blobs(d_project_config['bucket'])

### 2. Extract object from bucket

#### Pickle files

In [None]:
# Extract pickle file
local_path = f'/tmp/bucket_content.pickle'

# Download locally
try:
    bucket.blob(path).download_to_filename(local_path)
except (NotFound, FileNotFoundError) as e:
    if err_raise:
        raise FileNotFoundError()

# Read from tmp dir
with open(local_path, 'rb') as handle:
    data = pickle.load(handle)

print(data)

#### Plain text files

In [None]:
# Extract plain text file
local_path = f'/tmp/bucket_content'

# Download locally
try:
    bucket.blob(path).download_to_filename(local_path)
except (NotFound, FileNotFoundError) as e:
    if err_raise:
        raise FileNotFoundError()

# Read from tmp dir
with open(local_path, 'rb') as handle:
    data = handle.read()

print(data)


#### CSV x Pandas

In [None]:
bucket_name, path = d_project_config['bucket'], 'obj2.csv'
df = pd.read_csv(f'gs://{bucket_name}/{path}')
df.head()