In [1]:
import os
import sys
import pandas as pd
import logging
from google.cloud import bigquery
from hashlib import md5
from typing import List

# SETUP

# change to match your filesystem
DATA_DIR = "data/air_travel/"
DEFAULT_TICKETS_FILE = os.path.join(DATA_DIR, "tickets.json") 
PROJECT_NAME = "deb-01-372120"
DATASET_NAME = "air_travel"


# **** TABLE SCHEMAS ****

TABLE_METADATA = {
    'airlines': {
        'table_name': 'airlines',
        'schema': [
            bigquery.SchemaField('name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('iata', 'string', mode='REQUIRED'),
            bigquery.SchemaField('icao', 'string', mode='REQUIRED'),
            bigquery.SchemaField('callsign', 'string', mode='REQUIRED'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED')
        ]
    },
    'airports': {
        'table_name': 'airports',
        'schema': [
            bigquery.SchemaField('name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED'),
            bigquery.SchemaField('icao', 'string', mode='REQUIRED'),
            bigquery.SchemaField('lattitude', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('longitude', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('altitude', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('tz_timezone', 'string', mode='REQUIRED')
        ]
    },
    'passengers': {
        'table_name': 'passengers',
        'schema': [
            bigquery.SchemaField('record_id', 'string', mode='REQUIRED'),
            bigquery.SchemaField('first_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('last_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('gender', 'string', mode='REQUIRED'),
            bigquery.SchemaField('birth_date', 'datetime', mode='REQUIRED'),
            bigquery.SchemaField('email', 'string', mode='REQUIRED'),
            bigquery.SchemaField('street','string', mode='REQUIRED'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('state', 'string', mode='REQUIRED'),
            bigquery.SchemaField('zip', 'string', mode='REQUIRED'),
            bigquery.SchemaField('start_date', 'datetime', mode='REQUIRED'),
            bigquery.SchemaField('end_date', 'datetime', mode='REQUIRED')
        ]
    }
}


# **** SETUP LOGGING ****
# setup logging and logger
logging.basicConfig(            # setting up the root logger
    format='[%(levelname)-5s][%(asctime)s][%(module)s:%(lineno)04d] : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)
logger: logging.Logger = logging.getLogger('root')      # alias the root logger as `logger`
logger.setLevel(logging.DEBUG)                          # programmatically reassign the logging level


# **** BIGQUERY CLIENT ****
logger.debug(f"Creating bigquery client")
client = bigquery.Client()

logger.info(f"Setup Completed")


[DEBUG][2023-01-16 08:29:09,167][3941486437:0076] : Creating bigquery client
[INFO ][2023-01-16 08:29:09,234][3941486437:0079] : Setup Completed
