In [1]:
import os
import sys
import pandas as pd
import logging
from google.cloud import bigquery
from hashlib import md5
from typing import List
    
# **** SETUP Configuration Parameters****

###### Declare variables #######

#concatenate file folder with file name	
FILE_DIR = "../air_travel/"
DEFAULT_TICKETS_FILE = os.path.join(FILE_DIR, "tickets.json")
PROJECT_NAME = "debweek7-nb"
DATASET_NAME = "trip_data"

# **** Define Dimension TABLE SCHEMAS ****
TABLE_METADATA = {
    'airlines': {
        'table_name': 'airlines',
        'schema': [
            # indexes are written if only named in the schema
            bigquery.SchemaField('IATA_code', 'string', mode='REQUIRED'),
            bigquery.SchemaField('ICAO_code', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airline_name', 'string', mode='NULLABLE'),
            bigquery.SchemaField('callsign', 'string', mode='NULLABLE'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED'),
            ],
        },
    'origin_airports': {
        'table_name': 'origin_airports',
        'schema': [
            # indexes are written if only named in the schema
            bigquery.SchemaField('ICAO_code', 'string', mode='REQUIRED'),
            bigquery.SchemaField('IATA_code', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airport_name', 'string', mode='NULLABLE'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED'),
            bigquery.SchemaField('latitude', 'float', mode='REQUIRED'),
            bigquery.SchemaField('longitude', 'float', mode='REQUIRED'),
            bigquery.SchemaField('altitude', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('tz_timezone', 'string', mode='REQUIRED'),
            ],
        },
    'dest_airports': {
        'table_name': 'dest_airports',
        'schema': [
            # indexes are written if only named in the schema
            bigquery.SchemaField('ICAO_code', 'string', mode='REQUIRED'),
            bigquery.SchemaField('IATA_code', 'string', mode='REQUIRED'),
            bigquery.SchemaField('airport_name', 'string', mode='NULLABLE'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED'),
            bigquery.SchemaField('latitude', 'float', mode='REQUIRED'),
            bigquery.SchemaField('longitude', 'float', mode='REQUIRED'),
            bigquery.SchemaField('altitude', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('tz_timezone', 'string', mode='REQUIRED'),
            ],
        },
    'passengers': {
        'table_name': 'passengers',
        'schema': [
            # indexes are written if only named in the schema
            bigquery.SchemaField('passenger_id', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('latest_email', 'string', mode='NULLABLE'),
            bigquery.SchemaField('email', 'string', mode='REQUIRED'),
            bigquery.SchemaField('first_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('last_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('gender', 'string', mode='REQUIRED'),
            bigquery.SchemaField('birth_date', 'date', mode='REQUIRED'),
            bigquery.SchemaField('street', 'string', mode='REQUIRED'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('state', 'string', mode='REQUIRED'),
            bigquery.SchemaField('zip', 'int64', mode='REQUIRED'),
            bigquery.SchemaField('email_start_dte', 'date', mode='REQUIRED'),
            bigquery.SchemaField('email_end_dte', 'date', mode='REQUIRED'),
            ],
        },
	}
# **** SETUP Python LOGGING ****

logging.basicConfig(            # setting up the root logger
    format='[%(levelname)-5s][%(asctime)s][%(module)s:%(lineno)04d] : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)
#This line aliases the root logger as "logger"
logger: logging.Logger = logging.getLogger('root')      # alias the root logger as `logger`
logger.setLevel(logging.DEBUG)                          # programmatically reassign the logging level
	
# **** BIGQUERY CLIENT ****
logger.debug(f"Creating bigquery client")
client = bigquery.Client(project="debweek7-nb")
logger.info(f"Setup Completed")



[DEBUG][2023-12-23 16:50:49,258][131581134:0094] : Creating bigquery client
[DEBUG][2023-12-23 16:50:49,259][_default:0255] : Checking /home/marvel/.creds/dsa-deb-sa.json for explicit credentials as part of auth process...
[INFO ][2023-12-23 16:50:49,261][131581134:0096] : Setup Completed
