In [5]:
import os
import sys
import logging
import pandas as pd
from google.cloud import bigquery
from hashlib import md5
from typing import List
import json


# **** SETUP ****

# global variables for file system/ loading JSON file
CSV_DATA = "../data/standard_cards.csv"
# project paths  
PROJECT_NAME = "mtg-capstone-414921"
DATASET_NAME = "mtg_cards"

TABLE_METADATA = {
    'standard_format_2024': {
        'table_name': 'standard_format_2024',
        'schema': [
            # schema for the standard_format_2024 table
            bigquery.SchemaField('uuid', 'string', mode='REQUIRED'),
            bigquery.SchemaField('number', 'string', mode='REQUIRED'),
            bigquery.SchemaField('name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('setCode', 'string', mode='REQUIRED'),
            bigquery.SchemaField('text', 'string', mode='NULLABLE'),
            bigquery.SchemaField('manaCost', 'string', mode='NULLABLE'),
            bigquery.SchemaField('manaValue', 'float64', mode='NULLABLE'),
            bigquery.SchemaField('colors', 'string', mode='NULLABLE'),
            bigquery.SchemaField('colorIdentity', 'string', mode='NULLABLE'),
            bigquery.SchemaField('power', 'string', mode='NULLABLE'),
            bigquery.SchemaField('toughness', 'string', mode='NULLABLE'),
            bigquery.SchemaField('type', 'string', mode='NULLABLE'),
            bigquery.SchemaField('types', 'string', mode='NULLABLE'),
            bigquery.SchemaField('subtypes', 'string', mode='NULLABLE'),
            bigquery.SchemaField('supertypes', 'string', mode='NULLABLE'),
            bigquery.SchemaField('rarity', 'string', mode='NULLABLE'),
            bigquery.SchemaField('artist', 'string', mode='NULLABLE'),
            bigquery.SchemaField('printings', 'string', mode='NULLABLE'),
            bigquery.SchemaField('language', 'string', mode='NULLABLE'),
        ],
    },
}

# setup logging and logger
logging.basicConfig(            # setting up the root logger
    format='[%(levelname)-5s][%(asctime)s][%(module)s:%(lineno)04d] : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)
logger: logging.Logger = logging.getLogger('root')      # alias the root logger as `logger`
logger.setLevel(logging.DEBUG)                          # programmatically reassign the logging level

In [6]:
# Load JSON into pandas dataframe
standard_format_df = pd.read_csv('./data/standard_cards.csv')
print(standard_format_df.head(5))

                                   uuid number                name setCode  \
0  6ff98307-b89c-5a43-bc3c-3f81d803617d      1    Aeronaut Cavalry     BRO   
1  e4d66ec1-7ba2-5c80-a9c1-e33500cfdbb5      2    Airlift Chaplain     BRO   
2  8d065ec5-4e7f-50f6-92c8-277673a6fb19      3  Ambush Paratrooper     BRO   
3  45e16536-c429-54c9-906e-43b9f9ee83b2      4     Calamity's Wake     BRO   
4  d184bcd7-1c35-558f-8fd4-c2065a349018      5      Deadly Riposte     BRO   

                                                text manaCost  manaValue  \
0  Flying\nWhen Aeronaut Cavalry enters the battl...   {4}{W}        5.0   
1  Flying\nWhen Airlift Chaplain enters the battl...   {2}{W}        3.0   
2  Flash\nFlying\n{5}: Creatures you control get ...   {1}{W}        2.0   
3  Exile all graveyards. Players can't cast noncr...   {1}{W}        2.0   
4  Deadly Riposte deals 3 damage to target tapped...   {1}{W}        2.0   

  colors colorIdentity power toughness                      type     types

In [7]:
standard_table_name = f"{PROJECT_NAME}.{DATASET_NAME}.{TABLE_METADATA['standard_format_2024']['table_name']}"
standard_schema = schema=TABLE_METADATA['standard_format_2024']['schema']
client = bigquery.Client(project=PROJECT_NAME)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/kairo/.creds/mtg-capstone-key.json"

def load_table(
    df: pd.DataFrame, 
    client: bigquery.Client, 
    table_name: str, 
    schema: List[bigquery.SchemaField], 
    create_disposition: str = 'CREATE_IF_NEEDED', 
    write_disposition: str = 'WRITE_TRUNCATE'
    ) -> None:
    """load dataframe into bigquery table

    Args:
        df (pd.DataFrame): dataframe to load
        client (bigquery.Client): bigquery client
        table_name (str): full table name including project and dataset id
        schema (List[bigquery.SchemaField]): table schema with data types
        create_disposition (str, optional): create table disposition. Defaults to 'CREATE_IF_NEEDED'.
        write_disposition (str, optional): overwrite table disposition. Defaults to 'WRITE_TRUNCATE'.
    """

    # test table name to be full table name including project and dataset name. It must contain to dots
    assert len(table_name.split('.')) == 3, f"Table name must be a full bigquery table name including project and dataset id: '{table_name}'"
    
    # setup bigquery load job:
    #  create table if needed, replace rows, define the table schema
    job_config = bigquery.LoadJobConfig(
        create_disposition=create_disposition,
        write_disposition=write_disposition,
        schema=schema
    )
    logger.info(f"loading table: '{table_name}'")
    job = client.load_table_from_dataframe(df, destination=table_name, job_config=job_config)
    job.result() 

    # get the resulting table
    table = client.get_table(table_name)
    logger.info(f"loaded {table.num_rows} rows into {table.full_table_id}")

load_table(standard_format_df, client, standard_table_name, standard_schema)

[DEBUG][2024-02-25 14:23:16,176][_default:0255] : Checking /Users/kairo/.creds/mtg-capstone-key.json for explicit credentials as part of auth process...
[INFO ][2024-02-25 14:23:16,179][1510672391:0035] : loading table: 'mtg-capstone-414921.mtg_cards.standard_format_2024'
[DEBUG][2024-02-25 14:23:16,189][retry:0282] : Converted retries value: 3 -> Retry(total=3, connect=None, read=None, redirect=None, status=None)
[DEBUG][2024-02-25 14:23:16,225][requests:0185] : Making request: POST https://oauth2.googleapis.com/token
[DEBUG][2024-02-25 14:23:16,267][connectionpool:1055] : Starting new HTTPS connection (1): oauth2.googleapis.com:443
[DEBUG][2024-02-25 14:23:16,376][connectionpool:0549] : https://oauth2.googleapis.com:443 "POST /token HTTP/1.1" 200 None
[DEBUG][2024-02-25 14:23:16,379][connectionpool:1055] : Starting new HTTPS connection (1): bigquery.googleapis.com:443
[DEBUG][2024-02-25 14:23:17,409][connectionpool:0549] : https://bigquery.googleapis.com:443 "POST /upload/bigquery/v2