In [2]:
from google.cloud import storage

def list_blobs_and_dirs(project_id, bucket_name, prefix=None):
    """Lists blobs and simulates directories in a Google Cloud Storage bucket within a specific project."""

    storage_client = storage.Client(project=project_id)  # Specify the project
    bucket = storage_client.bucket(bucket_name)

    try:
        blobs = bucket.list_blobs(prefix=prefix)

        if prefix:
            print(f"Contents of gs://{bucket_name}/{prefix} (Project: {project_id}):")
        else:
            print(f"Contents of gs://{bucket_name} (Project: {project_id}):")

        printed_dirs = set()

        for blob in blobs:
            parts = blob.name.split("/")

            if len(parts) > 1:
                dir_name = "/".join(parts[:-1]) + "/"

                if dir_name not in printed_dirs:
                    print(f"  {dir_name}")
                    printed_dirs.add(dir_name)

                if not blob.name.endswith("/"):
                    print(f"    - {parts[-1]} (Size: {blob.size} bytes)")
            else:
                if not blob.name.endswith("/"):
                    print(f"- {blob.name} (Size: {blob.size} bytes)")

    except Exception as e:
        print(f"An error occurred: {e}")


# Example usage: Replace with your project ID and bucket name
project_id = "ou-dsa5900"  # Add your project ID here
bucket_name = "mmm-spring2025"

# List everything
list_blobs_and_dirs(project_id, bucket_name)

# List a specific "directory" (prefix)
# list_blobs_and_dirs(project_id, bucket_name, "my-folder/")  # Replace with your prefix

Contents of gs://mmm-spring2025 (Project: ou-dsa5900):
  bronze/
    - Adobe Cleaned.csv (Size: 25786 bytes)
    - Google Cleaned.csv (Size: 3277278 bytes)
    - Meta Cleaned.csv (Size: 1738839 bytes)
  raw-files/
    - Adobe Data Jan 23 - Jan 25.xlsx (Size: 71681 bytes)
    - Google Data Jan 23 - Jan 25.xlsx (Size: 1626890 bytes)
    - Meta Data Jan 23 - Jan 25.xlsx (Size: 1255659 bytes)


__BigQuery Access with User ID__
This next block tries to access public BigQuery datasets with the Google user ID as credentials instead of the service account.

Start by logging in as the Google user from a Terminal session using:

gcloud auth application-default login

gcloud config set project PROJECT-ID

Replace PROJECT-ID with the name of your project, such as ou-dsa5900.  This will log the user of the workstation in using their Google user ID.  It will then store authentication credentials locally (e.g., in ~/.config/gcloud/application_default_credentials.json).


In [3]:
from google.cloud import bigquery

# Initialize BigQuery client (automatically picks up credentials)
client = bigquery.Client()

# Define a public dataset query
query = "SELECT * FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` LIMIT 10"

# Run the query
query_job = client.query(query)

# Print results
for row in query_job.result():
    print(row)


Row(('26599763', 'Pay-as-you-ride', '21707', 'electric', datetime.datetime(2022, 5, 6, 14, 19, 39, tzinfo=datetime.timezone.utc), 4051, '10th/Red River', '4051', '10th/Red River', 195), {'trip_id': 0, 'subscriber_type': 1, 'bike_id': 2, 'bike_type': 3, 'start_time': 4, 'start_station_id': 5, 'start_station_name': 6, 'end_station_id': 7, 'end_station_name': 8, 'duration_minutes': 9})
Row(('26742903', '3-Day Weekender', '17460', 'electric', datetime.datetime(2022, 5, 23, 16, 24, 46, tzinfo=datetime.timezone.utc), 4051, '10th/Red River', '4051', '10th/Red River', 2), {'trip_id': 0, 'subscriber_type': 1, 'bike_id': 2, 'bike_type': 3, 'start_time': 4, 'start_station_id': 5, 'start_station_name': 6, 'end_station_id': 7, 'end_station_name': 8, 'duration_minutes': 9})
Row(('26599923', 'Pay-as-you-ride', '19453', 'electric', datetime.datetime(2022, 5, 6, 14, 37, 41, tzinfo=datetime.timezone.utc), 4051, '10th/Red River', '4051', '10th/Red River', 178), {'trip_id': 0, 'subscriber_type': 1, 'bike_

In [4]:
from google.cloud import bigquery

# Initialize BigQuery client (automatically picks up credentials)
client = bigquery.Client()

# Define a public dataset query
query = "SELECT * FROM `ou-dsa5900.mmm_spring2025.adobe` LIMIT 10"

# Run the query
query_job = client.query(query)

# Print results
for row in query_job.result():
    print(row)


NotFound: 404 Not found: Table ou-dsa5900:mmm_spring2025.adobe was not found in location us-central1; reason: notFound, message: Not found: Table ou-dsa5900:mmm_spring2025.adobe was not found in location us-central1

Location: us-central1
Job ID: 0d7136af-5d64-4946-98e5-24a525eb71e8
