# Connect to Google Cloud Bucket

In [1]:
from google.cloud import storage
import pandas as pd
import os

In [3]:
import getpass

In [4]:
project_id = f'{getpass.getpass("project id: ")}'

project id: ········


In [None]:
# This is a must-do step before the next client step
os.environ.setdefault("GCLOUD_PROJECT", project_id) # 'i-destiny-******'

In [6]:
client = storage.Client()

In [7]:
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter

tracer_provider = TracerProvider()
tracer_provider.add_span_processor(BatchSpanProcessor(CloudTraceSpanExporter()))
trace.set_tracer_provider(tracer_provider)

# Optional yet recommended to instrument the requests HTTP library
from opentelemetry.instrumentation.requests import RequestsInstrumentor
RequestsInstrumentor().instrument(tracer_provider=tracer_provider)

In [8]:
bucket_name = f'{getpass.getpass("bucket name:" )}'

bucket name:········


In [10]:
bucket = client.get_bucket(bucket_name)

In [11]:
type(bucket)

google.cloud.storage.bucket.Bucket

In [31]:
blobs.path

'/b/data_bucket_scraped/o'

In [34]:
blobs.bucket

<Bucket: data_bucket_scraped>

# Upload local file to Google Cloud Bucket

In [48]:
file_to_upload = f'{getpass.getpass('Local file path: ')}'

Local file path: ········


In [50]:
catalog_blob = bucket.blob('catalog/bulletin_catalog') # put the desired file name in the bucket
catalog_blob.upload_from_filename(file_to_upload)

In [41]:
type(catalog_blob)

google.cloud.storage.blob.Blob

In [52]:
data_blob = bucket.blob('data/visa_bulletin_all_time.csv') # desired file name for the file to be uploaded
data_blob.upload_from_filename('visa_bulletin_alltime.csv')

In [57]:
# The name of the blob. This corresponds to the unique path of the object in the bucket. 
blobs=client.list_blobs(bucket_name)
for blob in blobs:
    print(blob.name)

catalog/
catalog/bulletin_catalog
data/
data/visa_bulletin_all_time.csv


In [56]:
type(blobs)

google.api_core.page_iterator.HTTPIterator

# Upload pandas Dataframe to Google Cloud Bucket

`blob.upload_from_string(df.to_csv(), 'text/csv')`

In [67]:
# get the dataframe
upload_df = pd.read_csv("iv_catalog.csv")

In [69]:
upload_blob = bucket.blob("catalog/immigrant_visa_catalog.csv")
upload_blob.upload_from_string(upload_df.to_csv(index=False), 'text/csv')

In [71]:
upload_df2 = pd.read_csv("niv_catalog.csv")
upload_blob2 = bucket.blob("catalog/non_immigrant_visa_catalog.csv")
upload_blob2.upload_from_string(upload_df2.to_csv(index=False),'text/csv')

# Read File from Google Cloud Bucket

`pd.read_csv(io.BytesIO(blob.download_as_bytes())`

In [78]:
catalog_blob.name

'catalog/bulletin_catalog'

In [58]:
file_contents = catalog_blob.download_as_bytes()

In [60]:
import io

In [61]:
df = pd.read_csv(io.BytesIO(file_contents))

In [62]:
df.head()

Unnamed: 0,url,year,month,stamp
0,https://travel.state.gov/content/travel/en/leg...,2001,DECEMBER,2001-12-01
1,https://travel.state.gov/content/travel/en/leg...,2002,JANUARY,2002-01-01
2,https://travel.state.gov/content/travel/en/leg...,2002,FEBRUARY,2002-02-01
3,https://travel.state.gov/content/travel/en/leg...,2002,MARCH,2002-03-01
4,https://travel.state.gov/content/travel/en/leg...,2002,APRIL,2002-04-01


# List Objects in Google Cloud Bucket

In [76]:
bucket_objects = []
for blob in client.list_blobs(bucket_name, prefix='catalog/'):
    bucket_objects.append(blob.name)
    print(blob.name)

catalog/
catalog/bulletin_catalog
catalog/immigrant_visa_catalog.csv
catalog/non_immigrant_visa_catalog.csv


In [77]:
bucket_objects

['catalog/',
 'catalog/bulletin_catalog',
 'catalog/immigrant_visa_catalog.csv',
 'catalog/non_immigrant_visa_catalog.csv']