# access data bucket

Use Google modules for Python to access the MPR data in a GCS bucket.

## initializations and connect client

In [24]:
from google.cloud import storage
import os

bucketName = 'mpr-research-data-uploads'
blobName = 'comments_all_W19.tsv'
dataFileName = os.path.join('..', 'data', blobName)

client = storage.Client() # uses current project by default
print(f'Client created for project: {client.project}')

Client created for project: mwrite-a835


## get specific bucket

In [15]:
bucket = client.get_bucket(bucketName)

print(f'Bucket name: {bucket.name}')
print(f'Bucket location: {bucket.location}')
print(f'Bucket storage class: {bucket.storage_class}')

Bucket name: mpr-research-data-uploads
Bucket location: US
Bucket storage class: STANDARD


## get specific blob

In [22]:
blob = bucket.get_blob(blobName)

print(f'Name: {blob.id}')
print(f'Size: {blob.size} bytes')
print(f'Content type: {blob.content_type}')
print(f'Public URL: {blob.public_url}')

Name: mpr-research-data-uploads/comments_all_W19.tsv/1651756615245369
Size: 86395767 bytes
Content type: text/tab-separated-values
Public URL: https://storage.googleapis.com/mpr-research-data-uploads/comments_all_W19.tsv


## download blob to kernel directory

In [25]:
print(f'Downloading to "{dataFileName}"…')

blob.download_to_filename(dataFileName)

print(f'Downloaded blob "{blob.name}" to "{dataFileName}".')

Downloading to "../data/comments_all_W19.tsv"…
Downloaded blob "comments_all_W19.tsv" to "../data/comments_all_W19.tsv".


## miscellaneous

Some other GCS bucket operations that may be useful.

### list buckets

In [5]:
buckets = client.list_buckets()

print(f'Buckets in {client.project}:')
for item in buckets:
    print('\t' + item.name)

Buckets in mwrite-a835:
	mpr-research-data-uploads
	mwrite-data-bucket-1


### list blobs

In [6]:
blobs = bucket.list_blobs()

print(f'Blobs in {bucket.name}:')
for item in blobs:
    print('\t' + item.name)

Blobs in mpr-research-data-uploads:
	495677 - Math 216 WN 2022.tsv
	496007 - CLIMATE 102 001 WN 2022.tsv
	496915 - MOVESCI 110 WN 2022.tsv
	498928 - CHEM 216 100 WN 2022.tsv
	506914 - MATSCIE 250 100 WN 2022.tsv
	508768 - ECON 101 300 WN 2022.tsv
	516081 - STATS 250 ROMERO WN 22.tsv
	comments_all_W19.tsv


# 🏁