# Create BigQuery Tables from CSVs

In [None]:
%pip install --upgrade google-cloud-bigquery

In [3]:
!gcloud auth application-default login > /dev/null 2>&1

In [1]:
import datetime

from google.cloud import bigquery
import pandas
import pytz

# Construct a BigQuery client object.
client = bigquery.Client()

bq_data_id = "neo4jeventdemos.bomv2" #Change to your BQ Database

In [2]:
# Construct a full Dataset object to send to the API.
dataset = bigquery.Dataset(bq_data_id)

# TODO(developer): Specify the geographic location where the dataset should reside.
dataset.location = "US"

# Send the dataset to the API for creation, with an explicit timeout.
# Raises google.api_core.exceptions.Conflict if the Dataset already
# exists within the project.
dataset = client.create_dataset(dataset, exists_ok=True, timeout=30)  # Make an API request.
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

Created dataset neo4jeventdemos.bomv2


In [15]:
#Suppliers
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("code", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("annual_spend", "FLOAT"),
        bigquery.SchemaField("tier", "STRING"),
        bigquery.SchemaField("sub_type", "STRING"),
        bigquery.SchemaField("category", "STRING"),
        bigquery.SchemaField("latitude", "FLOAT"),
        bigquery.SchemaField("longitude", "FLOAT")
    ],
    skip_leading_rows=1,
    write_disposition="WRITE_TRUNCATE",
)
table_id = f"{bq_data_id}.suppliers"
uri = "gs://neo4j-workshop-data/genai-bom/suppliers.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.

LoadJob<project=neo4jeventdemos, location=US, id=d56601e7-1cd5-40f4-86be-7d7472a6068f>

In [16]:
# SupplierItems
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("supplier_code", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("sku_id", "STRING", mode="REQUIRED"),
    ],
    skip_leading_rows=1,
    write_disposition="WRITE_TRUNCATE",
)
table_id = f"{bq_data_id}.supplier_items"
uri = "gs://neo4j-workshop-data/genai-bom/supplier-items.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.

LoadJob<project=neo4jeventdemos, location=US, id=40fe84ec-e0cc-4049-aea8-07060533ac0a>

In [3]:
# Items
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("sku_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("family", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("is_finished_product", "BOOLEAN", mode="REQUIRED")
    ],
    skip_leading_rows=1,
    write_disposition="WRITE_TRUNCATE",
)
table_id = f"{bq_data_id}.items"
uri = "gs://neo4j-workshop-data/genai-bom/items.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.

LoadJob<project=neo4jeventdemos, location=US, id=a75bb5e9-f1d6-4e4c-a9f0-78417bbab71c>

In [18]:
# BOMTable
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("parent_sku_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("child_sku_id", "STRING", mode="REQUIRED")
    ],
    skip_leading_rows=1,
    write_disposition="WRITE_TRUNCATE",
)
table_id = f"{bq_data_id}.bom_table"
uri = "gs://neo4j-workshop-data/genai-bom/bom-table.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.

LoadJob<project=neo4jeventdemos, location=US, id=a4bfb6c8-408e-43cc-832f-f6180f289efb>

In [19]:
# Customers
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("code", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("cust_family_code", "STRING"),
        bigquery.SchemaField("annual_revenue", "FLOAT"),
        bigquery.SchemaField("latitude", "FLOAT"),
        bigquery.SchemaField("longitude", "FLOAT"),
    ],
    skip_leading_rows=1,
    write_disposition="WRITE_TRUNCATE",
)
table_id = f"{bq_data_id}.customers"
uri = "gs://neo4j-workshop-data/genai-bom/customers.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.

LoadJob<project=neo4jeventdemos, location=US, id=91a6268d-28f7-4dc8-97c6-e8b670f01d7d>

In [21]:
# CustomerItems
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("customer_code", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("sku_id", "STRING", mode="REQUIRED"),
    ],
    skip_leading_rows=1,
    write_disposition="WRITE_TRUNCATE",
)
table_id = f"{bq_data_id}.customer_items"
uri = "gs://neo4j-workshop-data/genai-bom/customer-items.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.

LoadJob<project=neo4jeventdemos, location=US, id=19a516dc-36af-4cac-81b1-435efc513c3b>