In [8]:
from google.cloud import storage

def list_blobs_and_dirs(project_id, bucket_name, prefix=None):
    """Lists blobs and simulates directories in a Google Cloud Storage bucket within a specific project."""

    storage_client = storage.Client(project=project_id)  # Specify the project
    bucket = storage_client.bucket(bucket_name)

    try:
        blobs = bucket.list_blobs(prefix=prefix)

        if prefix:
            print(f"Contents of gs://{bucket_name}/{prefix} (Project: {project_id}):")
        else:
            print(f"Contents of gs://{bucket_name} (Project: {project_id}):")

        printed_dirs = set()

        for blob in blobs:
            parts = blob.name.split("/")

            if len(parts) > 1:
                dir_name = "/".join(parts[:-1]) + "/"

                if dir_name not in printed_dirs:
                    print(f"  {dir_name}")
                    printed_dirs.add(dir_name)

                if not blob.name.endswith("/"):
                    print(f"    - {parts[-1]} (Size: {blob.size} bytes)")
            else:
                if not blob.name.endswith("/"):
                    print(f"- {blob.name} (Size: {blob.size} bytes)")

    except Exception as e:
        print(f"An error occurred: {e}")


# Example usage: Replace with your project ID and bucket name
project_id = "ou-dsa5900"  # Add your project ID here
bucket_name = "mmm-spring2025"

# List everything
list_blobs_and_dirs(project_id, bucket_name)

# List a specific "directory" (prefix)
# list_blobs_and_dirs(project_id, bucket_name, "my-folder/")  # Replace with your prefix

Contents of gs://mmm-spring2025 (Project: ou-dsa5900):
  agent-test/
    - DSA5900 Practicum Syllabus.pdf (Size: 192221 bytes)
  bronze/
    - adobe_reformat_raw_update_utf8.csv (Size: 51030 bytes)
    - google_reformat_raw_update_utf8.csv (Size: 3551073 bytes)
    - meta_reformat_raw_update_utf8.csv (Size: 1815599 bytes)
  raw-files/
    - Adobe Ecom Data 100122 - 093023.xlsx (Size: 193540 bytes)
    - Adobe Ecom Data 100123 - 093024.xlsx (Size: 204830 bytes)
    - Google Data 100122 - 110324.xlsx (Size: 1769255 bytes)
    - Google Data 100123 - 093024.xlsx (Size: 3456257 bytes)
    - Meta Data 100123 - 093024.xlsx (Size: 685841 bytes)
    - Meta Data 100522 - 110324.xlsx (Size: 1266301 bytes)


__BigQuery Access with User ID__
This next block tries to access public BigQuery datasets with the Google user ID as credentials instead of the service account.

Start by logging in as the Google user from a Terminal session using:

gcloud auth application-default login
gcloud config set project <PROJECT-ID>

Replace <PROJECT-ID> with the name of your project, such as ou-dsa5900.  This will log the user of the workstation in using their Google user ID.  It will then store authentication credentials locally (e.g., in ~/.config/gcloud/application_default_credentials.json).


In [2]:
from google.cloud import bigquery

# Initialize BigQuery client (automatically picks up credentials)
client = bigquery.Client()

# Define a public dataset query
query = "SELECT * FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` LIMIT 10"

# Run the query
query_job = client.query(query)

# Print results
for row in query_job.result():
    print(row)


Row(('19309516', 'Single Trip', '647', 'classic', datetime.datetime(2019, 2, 24, 9, 32, 17, tzinfo=datetime.timezone.utc), 4051, '10th & Red River', '4051', '10th & Red River', 12), {'trip_id': 0, 'subscriber_type': 1, 'bike_id': 2, 'bike_type': 3, 'start_time': 4, 'start_station_id': 5, 'start_station_name': 6, 'end_station_id': 7, 'end_station_name': 8, 'duration_minutes': 9})
Row(('19309511', 'Single Trip', '12802', 'classic', datetime.datetime(2019, 2, 24, 9, 31, 3, tzinfo=datetime.timezone.utc), 4051, '10th & Red River', '4051', '10th & Red River', 8), {'trip_id': 0, 'subscriber_type': 1, 'bike_id': 2, 'bike_type': 3, 'start_time': 4, 'start_station_id': 5, 'start_station_name': 6, 'end_station_id': 7, 'end_station_name': 8, 'duration_minutes': 9})
Row(('19309522', 'Single Trip', '046G', 'classic', datetime.datetime(2019, 2, 24, 9, 34, 16, tzinfo=datetime.timezone.utc), 4051, '10th & Red River', '4051', '10th & Red River', 11), {'trip_id': 0, 'subscriber_type': 1, 'bike_id': 2, 'b

In [4]:
from google.cloud import bigquery

# Initialize BigQuery client (automatically picks up credentials)
client = bigquery.Client()

# Define a public dataset query
query = "SELECT * FROM `ou-dsa5900.mmm_spring2025.adobe` LIMIT 10"

# Run the query
query_job = client.query(query)

# Print results
for row in query_job.result():
    print(row)


Row((datetime.date(2022, 12, 30), 3607, 85, 12628.97, 4585, 105, 28812.78, 7868, 204, 23481.0, 7310, 128, 18168.6, 2749, 48, 7869.88, 24807, 390, 63216.56, 'adobe'), {'visitdate': 0, 'sportswear_visits': 1, 'sportswear_orders': 2, 'sportswear_revenue': 3, 'baseball_visits': 4, 'baseball_orders': 5, 'baseball_revenue': 6, 'inflates_visits': 7, 'inflates_orders': 8, 'inflates_revenue': 9, 'racquet_visits': 10, 'racquet_orders': 11, 'racquet_revenue': 12, 'golf_visits': 13, 'golf_orders': 14, 'golf_revenue': 15, 'unspecified_visits': 16, 'unspecified_orders': 17, 'unspecified_revenue': 18, 'channel': 19})
Row((datetime.date(2022, 10, 14), 5512, 100, 17983.24, 6076, 108, 31862.43, 7010, 185, 34031.78, 6972, 156, 33433.52, 4014, 54, 10266.46, 30944, 409, 88769.96, 'adobe'), {'visitdate': 0, 'sportswear_visits': 1, 'sportswear_orders': 2, 'sportswear_revenue': 3, 'baseball_visits': 4, 'baseball_orders': 5, 'baseball_revenue': 6, 'inflates_visits': 7, 'inflates_orders': 8, 'inflates_revenue':