In [1]:
# this will be used to access the bigquery client
from google.cloud import bigquery
# this will be used to access the service account modules - passwords and access tokens
from google.oauth2 import service_account
# this will be used for returning data, increases the speed
from google.cloud import bigquery_storage

#general dataframe usage
import pandas as pd
# required for certain returns
import pandas_gbq
# exception for a short script
from google.api_core.exceptions import NotFound

In [2]:
# !python --version
# !python -m pip --version
# pip install seaborn
# pip install google-cloud-bigquery db-dtypes
# pip install google-cloud-bigquery-storage
# pip install pandas_gbq

In [3]:
# Path to service account if stored locally
SERVICE_ACCOUNT_JSON = r"portfolio2026-485323-70c4d609b156.json"

# Credentials to access Big Query via the service account
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_JSON)

# BigQuery client
client = bigquery.Client(credentials=credentials, project=credentials.project_id)

# BigQuery Storage client
bq_storage_client = bigquery_storage.BigQueryReadClient(credentials=credentials)

# Example query
query = """
SELECT name, SUM(number) as total
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE state = 'TX'
GROUP BY name
ORDER BY total DESC
LIMIT 10
"""

# Fetch DataFrame using the storage client (fast)
df = client.query(query).to_dataframe(bqstorage_client=bq_storage_client)

In [4]:
df.head()

Unnamed: 0,name,total
0,James,272793
1,John,235139
2,Michael,225320
3,Robert,220399
4,David,219028


# Write to Big Query 

Data sets operate as a large container for data. Within each data set there will be multiple SQL tables. These tables will be used to store the data and generate Looker dashboard. These table are highly optimized for large data sets and increase the speed of data visualizations compared to other options, like google sheets.

In [5]:
datasets = list(client.list_datasets())

In [6]:
if datasets:
    print("Datasets in project:")
    for dataset in datasets:
        print(dataset.dataset_id)
else:
    print("No datasets found.")


# function to see if table already exists in big query data set
def table_exists(client, full_table_id):
    try:
        client.get_table(full_table_id)
        return True
    except NotFound:
        return False

Datasets in project:
portfolio1


In [7]:
df.columns = df.columns.str.lower().str.strip().str.replace(r"[^\w]+","_", regex=True)

In [8]:
job_config = bigquery.LoadJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
                                    autodetect=True)

full_table_id = f"{credentials.project_id}.portfolio1.sample_data"

if table_exists(client, full_table_id):
    print(f"Table {full_table_id} already exists.")
    print("Overwriting existing table...")
else:
    print("Creating new table...")

load_job = client.load_table_from_dataframe(df, full_table_id, job_config=job_config)
load_job.result()

Table portfolio2026-485323.portfolio1.sample_data already exists.
Overwriting existing table...


LoadJob<project=portfolio2026-485323, location=US, id=ba32ef7e-584f-4f83-92a7-196752dca94e>