# access data bucket

Use Google modules for Python to access the MPR data in a GCS bucket.

## initializations and connect client

In [1]:
from google.cloud import storage
import os

bucketName = 'mwrite-data-bucket-2'
blobName = 'comments_all_W19.tsv'
dataFileName = os.path.join('data', blobName)

client = storage.Client() # uses current project by default
print(f'Client created for project: {client.project}')

Client created for project: mwrite-a835


## get specific bucket

In [2]:
bucket = client.get_bucket(bucketName)

print(f'Bucket name: {bucket.name}')
print(f'Bucket location: {bucket.location}')
print(f'Bucket storage class: {bucket.storage_class}')

Bucket name: mwrite-data-bucket-2
Bucket location: US
Bucket storage class: STANDARD


## get specific blob

In [3]:
blob = bucket.get_blob(blobName)

print(f'Name: {blob.id}')
print(f'Size: {blob.size} bytes')
print(f'Content type: {blob.content_type}')
print(f'Public URL: {blob.public_url}')

Name: mwrite-data-bucket-2/comments_all_W19.tsv/1649780699517269
Size: 86395767 bytes
Content type: text/tab-separated-values
Public URL: https://storage.googleapis.com/mwrite-data-bucket-2/comments_all_W19.tsv


## download blob to kernel directory

In [4]:
blob.download_to_filename(dataFileName)

print(f'Downloaded blob "{blob.name}" to "{dataFileName}".')

Downloaded blob "comments_all_W19.tsv" to "data/comments_all_W19.tsv".


## miscellaneous

Some other GCS bucket operations that may be useful.

### list buckets

In [5]:
buckets = client.list_buckets()

print(f'Buckets in {client.project}:')
for item in buckets:
    print('\t' + item.name)

Buckets in mwrite-a835:
	mwrite-data-bucket-1
	mwrite-data-bucket-2


### list blobs

In [6]:
blobs = bucket.list_blobs()

print(f'Blobs in {bucket.name}:')
for item in blobs:
    print('\t' + item.name)

Blobs in mwrite-data-bucket-2:
	comments_all_W19.tsv


# üèÅ