## Set-up

In [9]:
# Upgrade pip
!pip install --upgrade pip

Collecting pip
  Downloading pip-23.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
[0mSuccessfully installed pip-23.2.1


In [16]:
# Installing the latest version of the package
!pip install --user google-cloud-bigquery==3.4.1
!pip install --user pandas
!pip install google-cloud-bigquery



In [1]:
from google.cloud import bigquery

## Load data to GCS

In [3]:
# List all buckets within project
!gcloud storage ls

gs://default-credit-clients-2023/
gs://terraform_state_bucket_bqml_sandbox_2023_09/


In [5]:
!gsutil cp default-of-credit-card-clients.csv gs://default-credit-clients-2023

Copying file://default-of-credit-card-clients.csv [Content-Type=text/csv]...
- [1 files][  2.8 MiB/  2.8 MiB]                                                
Operation completed over 1 objects/2.8 MiB.                                      


## Load data to BiqQuery

In [2]:
# Define Constans
shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null # to supress any error messages
PROJECT_ID = shell_output[0]

LOCATION = "EU"

In [4]:
# Initialize BQ client
bq_client = bigquery.Client(project=PROJECT_ID)

In [5]:
# define schema
schema = [
    bigquery.SchemaField("ID", "INTEGER",mode="REQUIRED"),
    bigquery.SchemaField("LIMIT_BAL","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("SEX","STRING",mode="NULLABLE"),
    bigquery.SchemaField("EDUCATION","STRING",mode="NULLABLE"),
    bigquery.SchemaField("MARRIAGE","STRING",mode="NULLABLE"),
    bigquery.SchemaField("AGE","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_0","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_2","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_3","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_4","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_5","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_6","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("BILL_AMT1","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("BILL_AMT2","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("BILL_AMT3","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("BILL_AMT4","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("BILL_AMT5","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("BILL_AMT6","INTEGER",mode="NULLABLE"),

    bigquery.SchemaField("PAY_AMT1","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_AMT2","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_AMT3","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_AMT4","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_AMT5","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("PAY_AMT6","INTEGER",mode="NULLABLE"),
    bigquery.SchemaField("default payment next month","INTEGER",mode="REQUIRED")
]

In [7]:
dataset = bigquery.Dataset(dataset_ref="bqml-sandbox-396011.BQ_ML_ID")
table_ref = dataset.table("CUSTOMER_DEFAULT_DATA")
table = bigquery.Table(table_ref=table_ref,
                       schema=schema)
table = bq_client.create_table(table)  # Make an API request.
print(
    "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)

Created table bqml-sandbox-396011.BQ_ML_ID.CUSTOMER_DEFAULT_DATA


In [8]:
table_id = "bqml-sandbox-396011.BQ_ML_ID.CUSTOMER_DEFAULT_DATA"
job_config = bigquery.LoadJobConfig(
    schema = schema,
    skip_leading_rows=1,
    source_format=bigquery.SourceFormat.CSV,
    field_delimiter=";",
    write_disposition="WRITE_APPEND"
)

uri = "gs://default-credit-clients-2023/default-of-credit-card-clients.csv"

load_job = bq_client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.

load_job.result()  # Waits for the job to complete.

LoadJob<project=bqml-sandbox-396011, location=EU, id=38b4f3e0-ac74-40fd-9d94-00f05f6c5482>