In [68]:
import os
import sys
import pandas as pd
import logging
import json
import uuid
from datetime import date
from google.cloud import bigquery
from hashlib import md5
from typing import List


# SETUP

DATA_DIR = "data/air_travel/"
DEFAULT_TICKETS_FILE = os.path.join(DATA_DIR, "tickets.json") 
PROJECT_NAME = "deb-01-372120"
DATASET_NAME = "air_travel"


# **** TABLE SCHEMAS ****

TABLE_METADATA = {
    'airlines': {
        'table_name': 'airlines',
        'schema': [
            bigquery.SchemaField('name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('iata', 'string', mode='REQUIRED'),
            bigquery.SchemaField('icao', 'string', mode='REQUIRED'),
            bigquery.SchemaField('callsign', 'string', mode='REQUIRED'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED')
        ]
    },
    'airports': {
        'table_name': 'airports',
        'schema': [
            bigquery.SchemaField('name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('country', 'string', mode='REQUIRED'),
            bigquery.SchemaField('iata', 'string', mode='REQUIRED'),
            bigquery.SchemaField('icao', 'string', mode='REQUIRED'),
            bigquery.SchemaField('latitude', 'float64', mode='REQUIRED'),
            bigquery.SchemaField('longitude', 'float64', mode='REQUIRED'),
            bigquery.SchemaField('altitude', 'float64', mode='REQUIRED'),
            bigquery.SchemaField('tz_timezone', 'string', mode='REQUIRED')
        ]
    },
    'passengers': {
        'table_name': 'passengers',
        'schema': [
            bigquery.SchemaField('first_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('last_name', 'string', mode='REQUIRED'),
            bigquery.SchemaField('gender', 'string', mode='REQUIRED'),
            bigquery.SchemaField('birth_date', 'date', mode='REQUIRED'),
            bigquery.SchemaField('email', 'string', mode='REQUIRED'),
            bigquery.SchemaField('street','string', mode='REQUIRED'),
            bigquery.SchemaField('city', 'string', mode='REQUIRED'),
            bigquery.SchemaField('state', 'string', mode='REQUIRED'),
            bigquery.SchemaField('zip', 'string', mode='REQUIRED'),
            bigquery.SchemaField('start_date', 'date', mode='REQUIRED'),
            bigquery.SchemaField('end_date', 'date', mode='NULLABLE'),
            bigquery.SchemaField('record_id', 'string', mode='REQUIRED')
        ]
    },
    'tickets': {
        'table_name': 'tickets',
        'schema': [
            bigquery.SchemaField('eticket_num', 'string', mode='REQUIRED'),
            bigquery.SchemaField('confirmation', 'string', mode='REQUIRED'),
            bigquery.SchemaField('ticket_date', 'date', mode='REQUIRED'),
            bigquery.SchemaField('price', 'float64', mode='REQUIRED'),
            bigquery.SchemaField('seat', 'string', mode='REQUIRED'),
            bigquery.SchemaField('status','string', mode='REQUIRED'),
            bigquery.SchemaField('airline', 'string', mode='REQUIRED'), 
            bigquery.SchemaField('origin', 'string', mode='REQUIRED'),
            bigquery.SchemaField('destination', 'string', mode='REQUIRED'),
            # bigquery.SchemaField('passenger', 'string', mode='REQUIRED')
        ]
    }
}


# **** SETUP LOGGING ****
# setup logging and logger
logging.basicConfig(            # setting up the root logger
    format='[%(levelname)-5s][%(asctime)s][%(module)s:%(lineno)04d] : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)
logger: logging.Logger = logging.getLogger('root')      # alias the root logger as `logger`
logger.setLevel(logging.DEBUG)                          # programmatically reassign the logging level


# **** BIGQUERY CLIENT ****
logger.debug(f"Creating bigquery client")
client = bigquery.Client()

logger.info(f"Setup Completed")


[DEBUG][2023-01-16 17:26:26,661][4126403714:0095] : Creating bigquery client
[INFO ][2023-01-16 17:26:26,736][4126403714:0098] : Setup Completed


In [38]:
# create dataset 
dataset_id = f"{PROJECT_NAME}.{DATASET_NAME}"
dataset = bigquery.Dataset(dataset_id)
dataset.location = "US"
dataset = client.create_dataset(dataset, exists_ok=True)

logger.info(f"Created air travel dataset: {dataset.full_dataset_id}")

[INFO ][2023-01-16 15:26:49,560][2632008059:0007] : Created air travel dataset: deb-01-372120:air_travel


In [39]:
# air travel data file name
filename = DEFAULT_TICKETS_FILE
logger.debug(f"attempting to process: {filename}")

# check if the file exists
assert os.path.exists(filename), f"Data file does not exists: '{filename}'"
# check if the file contains any data 
assert os.path.getsize(filename) > 78, f"Data file size incorrect; does not seem to contain data: '{filename}'"

# convert json into an array and then load into dataframe
data = [json.loads(line) for line in open(filename, 'r')]

df = pd.json_normalize(data)
logger.info(f"loaded {len(df.index)} rows from: {filename}")

# check schema: contains all expected columns?
expected_columns = ['eticket_num', 'confirmation', 'ticket_date', 'price', 'seat', 'status', 'airline.name', 'airline.iata', 'airline.icao', 'airline.callsign', 'airline.country', 'origin.name', 'origin.city', 'origin.country', 'origin.iata', 'origin.icao', 'origin.latitude', 'origin.longitude', 'origin.altitude', 'origin.tz_timezone', 'destination.name', 'destination.city', 'destination.country', 'destination.iata', 'destination.icao', 'destination.latitude', 'destination.longitude', 'destination.altitude', 'destination.tz_timezone', 'passenger.first_name', 'passenger.last_name', 'passenger.gender', 'passenger.birth_date', 'passenger.email', 'passenger.street', 'passenger.city', 'passenger.state', 'passenger.zip']
for col in expected_columns:
    assert col in list(df.columns), f"Data file missing required column: {col}"

# assign & remember receipts dataframe
tickets_df = df
display(tickets_df.head(n=10))

[DEBUG][2023-01-16 15:26:50,058][829190179:0003] : attempting to process: data/air_travel/tickets.json
[INFO ][2023-01-16 15:26:51,348][829190179:0014] : loaded 4096 rows from: data/air_travel/tickets.json


Unnamed: 0,eticket_num,confirmation,ticket_date,price,seat,status,airline.name,airline.iata,airline.icao,airline.callsign,...,passenger.last_name,passenger.gender,passenger.birth_date,passenger.email,passenger.street,passenger.city,passenger.state,passenger.zip,origin,destination
0,498-938211-0795,ZVFDC4,2022-03-23,723.42,31I,active,China Eastern Airlines,MU,CES,CHINA EASTERN,...,Brown,M,1969-02-17,robert.brown.69@hotmail.com,5007 Thomas Way,Lake Hollystad,DC,20027,,
1,482-850738-6048,IL5GUI,2022-03-23,765.18,29B,active,Hawaiian Airlines,HA,HAL,HAWAIIAN,...,Kent,F,1998-08-05,laura.kent.98@hotmail.com,13991 Davis Village,North Catherineborough,PA,16516,,
2,275-207321-8092,CYEFBC,2022-03-21,753.89,26I,active,Wizz Air,W6,WZZ,WIZZ AIR,...,Tucker,F,1965-01-22,lisa.tucker.65@hotmail.com,04135 Marvin Via,North Kristabury,MA,1093,,
3,246-793315-3102,ZNGPC2,2022-03-22,793.89,15A,active,AirAsia,AK,AXM,ASIAN EXPRESS,...,Yates,NB,1975-03-31,matthew.yates.75@yahoo.com,76045 Samantha Road Suite 111,Lake Jeffrey,DE,19898,,
4,091-128904-1226,MGSBD9,2022-03-24,820.25,17F,active,Xiamen Airlines,MF,CXA,XIAMEN AIR,...,Villanueva,NB,1945-08-14,megan.villanueva.45@hotmail.com,848 Melissa Springs Suite 947,Kellerstad,TX,76177,,
5,115-196069-8963,XFYQC0,2022-03-23,892.69,18C,active,Air New Zealand,NZ,ANZ,NEW ZEALAND,...,Hall,NB,1944-08-31,sarah.hall.44@gmail.com,75420 Michael Mountains Suite 485,New Victoria,HI,96727,,
6,396-673460-1326,N5UOOZ,2022-03-23,889.53,3C,active,Jeju Air,7C,JJA,JEJU AIR,...,Thompson,M,1968-05-02,seth.thompson.68@yahoo.com,22455 Higgins Junction Apt. 042,New Keith,OR,97405,,
7,380-894599-8109,PAA19Y,2022-03-22,706.78,7D,active,American Airlines,AA,AAL,AMERICAN,...,Garcia,F,1950-02-12,jennifer.garcia.50@gmail.com,6607 Sharp Common,Chadstad,VA,22121,,
8,614-960971-2686,EF4BHJ,2022-03-23,486.4,24J,active,Juneyao Airlines,HO,DKH,JUNEYAO AIRLINES,...,Clark,F,1991-11-09,becky.clark.91@gmail.com,691 Jones Cliffs,Michaelburgh,TX,76003,,
9,481-321233-0702,FVM9EE,2022-03-23,855.93,16A,active,Royal Air Maroc,AT,RAM,ROYALAIR MAROC,...,Cook,M,1976-07-29,ronald.cook.76@hotmail.com,93328 Davis Island,Rodriguezside,MD,21408,,


In [40]:
# Gather unique airlines 
# start from the tickets_df
df = tickets_df

logger.debug(f"getting unique airlines...")

# set of unique columns to return
cols = ['airline.name', 'airline.iata', 'airline.icao', 'airline.callsign','airline.country']
# group by unique columns and only select them
df = df.groupby(cols).all()
df = df.reset_index().loc[:, cols]
# rename columns
df = df.rename(columns={'airline.name': 'name', 'airline.iata':'iata', 'airline.icao':'icao', 'airline.callsign':'callsign', 'airline.country':'country'})

# Set index to IATA
df = df.set_index(keys='iata')

logger.info(f"airlines dim - found {len(df.index)} rows")
df.head(10)

[DEBUG][2023-01-16 15:26:51,991][641296019:0005] : getting unique airlines...
[INFO ][2023-01-16 15:26:52,414][641296019:0018] : airlines dim - found 48 rows


Unnamed: 0_level_0,name,icao,callsign,country
iata,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AC,Air Canada,ACA,AIR CANADA,Canada
CA,Air China,CCA,AIR CHINA,China
AF,Air France,AFR,AIRFRANS,France
NZ,Air New Zealand,ANZ,NEW ZEALAND,New Zealand
AK,AirAsia,AXM,ASIAN EXPRESS,Malaysia
AS,Alaska Airlines,ASA,Inc.,ALASKA
G4,Allegiant Air,AAY,ALLEGIANT,United States
AA,American Airlines,AAL,AMERICAN,United States
BA,British Airways,BAW,SPEEDBIRD,United Kingdom
9K,Cape Air,KAP,CAIR,United States


In [48]:
# Create load_table function that will deal loading airline table to BigQuery

def load_table(
    df: pd.DataFrame, 
    client: bigquery.Client, 
    table_name: str, 
    schema: List[bigquery.SchemaField], 
    create_disposition: str = 'CREATE_IF_NEEDED', 
    write_disposition: str = 'WRITE_TRUNCATE'
    ) -> None:
    """load dataframe into bigquery table

    Args:
        df (pd.DataFrame): dataframe to load
        client (bigquery.Client): bigquery client
        table_name (str): full table name including project and dataset id
        schema (List[bigquery.SchemaField]): table schema with data types
        create_disposition (str, optional): create table disposition. Defaults to 'CREATE_IF_NEEDED'.
        write_disposition (str, optional): overwrite table disposition. Defaults to 'WRITE_TRUNCATE'.
    """
    # *** run some checks ***
    # test table name to be full table name including project and dataset name. It must contain to dots
    assert len(table_name.split('.')) == 3, f"Table name must be a full bigquery table name including project and dataset id: '{table_name}'"
    # setup bigquery load job:
    #  create table if needed, replace rows, define the table schema
    job_config = bigquery.LoadJobConfig(
        create_disposition=create_disposition,
        write_disposition=write_disposition,
        schema=schema
    )
    logger.info(f"loading table: '{table_name}'")
    job = client.load_table_from_dataframe(df, destination=table_name, job_config=job_config)
    job.result()        # wait for the job to finish
    # get the resulting table
    table = client.get_table(table_name)
    logger.info(f"loaded {table.num_rows} rows into {table.full_table_id}")

In [43]:
# Load airline table to BigQuery
# get table name and schema from our TABLE_METADATA config param
table_name = f"{PROJECT_NAME}.{DATASET_NAME}.{TABLE_METADATA['airlines']['table_name']}"
schema = schema=TABLE_METADATA['airlines']['schema']
# load dataframe
load_table(df, client, table_name, schema)

logger.info(f"loaded airlines dim")

[INFO ][2023-01-16 15:27:29,360][3201137408:0032] : loading table: 'deb-01-372120.air_travel.airlines'
[INFO ][2023-01-16 15:27:34,662][3201137408:0037] : loaded 48 rows into deb-01-372120:air_travel.airlines
[INFO ][2023-01-16 15:27:34,664][1629074712:0008] : loaded airlines dim


In [44]:
# Gather unique aiports 
# start from the tickets_df
df = tickets_df

logger.debug(f"getting unique airports...")

# set of unique columns to return
cols = ['origin.name', 'origin.city', 'origin.country', 'origin.iata', 'origin.icao', 'origin.latitude', 'origin.longitude', 'origin.altitude', 'origin.tz_timezone']
# group by unique columns and only select them
df = df.groupby(cols).all()
df = df.reset_index().loc[:, cols]
# rename columns
df = df.rename(columns={'origin.name':'name', 'origin.city':'city', 'origin.country':'country', 'origin.iata':'iata', 'origin.icao':'icao', 'origin.latitude':'latitude', 'origin.longitude':'longitude', 'origin.altitude':'altitude', 'origin.tz_timezone':'tz_timezone'})

# Set index to IATA
df = df.set_index(keys='iata')

logger.info(f"airports dim - found {len(df.index)} rows")
df.head(10)

[DEBUG][2023-01-16 15:27:38,036][2483278288:0005] : getting unique airports...
[INFO ][2023-01-16 15:27:38,466][2483278288:0018] : airports dim - found 386 rows


Unnamed: 0_level_0,name,city,country,icao,latitude,longitude,altitude,tz_timezone
iata,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AUH,Abu Dhabi International Airport,Abu Dhabi,United Arab Emirates,OMAA,24.43,54.65,88.0,Asia/Dubai
MAD,Adolfo Suárez Madrid–Barajas Airport,Madrid,Spain,LEMD,40.47,-3.56,1998.0,Europe/Madrid
CWB,Afonso Pena Airport,Curitiba,Brazil,SBCT,-25.53,-49.18,2988.0,America/Sao_Paulo
MCP,Alberto Alcolumbre Airport,Macapa,Brazil,SBMQ,0.05,-51.07,56.0,America/Fortaleza
ABQ,Albuquerque International Sunport,Albuquerque,United States,KABQ,35.04,-106.61,5355.0,America/Denver
CUZ,Alejandro Velasco Astete International Airport,Cuzco,Peru,SPZO,-13.54,-71.94,10860.0,America/Lima
CLO,Alfonso Bonilla Aragon International Airport,Cali,Colombia,SKCL,3.54,-76.38,3162.0,America/Bogota
ALC,Alicante International Airport,Alicante,Spain,LEAL,38.28,-0.56,142.0,Europe/Madrid
REL,Almirante Marco Andres Zar Airport,Trelew,Argentina,SAVT,-43.21,-65.27,141.0,America/Catamarca
AMS,Amsterdam Airport Schiphol,Amsterdam,Netherlands,EHAM,52.31,4.76,-11.0,Europe/Amsterdam


In [45]:
# Load airport dim into BigQuery
# get table name and schema from our TABLE_METADATA config param
table_name = f"{PROJECT_NAME}.{DATASET_NAME}.{TABLE_METADATA['airports']['table_name']}"
schema = schema=TABLE_METADATA['airports']['schema']
# load dataframe
load_table(df, client, table_name, schema)

logger.info(f"loaded airports dim")

[INFO ][2023-01-16 15:27:45,098][3201137408:0032] : loading table: 'deb-01-372120.air_travel.airports'
[INFO ][2023-01-16 15:27:49,568][3201137408:0037] : loaded 386 rows into deb-01-372120:air_travel.airports
[INFO ][2023-01-16 15:27:49,570][3846598676:0008] : loaded airports dim


In [60]:
# Gather unique passengers 
# start from the tickets_df
df = tickets_df

logger.debug(f"getting unique passengers...")

# set of unique columns to return
cols = ['passenger.first_name', 'passenger.last_name', 'passenger.gender', 'passenger.birth_date', 'passenger.email', 'passenger.street', 'passenger.city', 'passenger.state', 'passenger.zip']
# group by unique columns and only select them
df = df.groupby(cols).all()
df = df.reset_index().loc[:, cols]
# rename columns
df = df.rename(columns={'passenger.first_name':'first_name', 'passenger.last_name':'last_name', 'passenger.gender':'gender', 'passenger.birth_date':'birth_date', 'passenger.email':'email', 'passenger.street':'street', 'passenger.city':'city', 'passenger.state':'state', 'passenger.zip':'zip'})

# Set index to email
df = df.set_index(keys='email')

# Add start and end date columns
df['start_date'] = date.today()
df['end_date'] = None

# Generate UUID for passengers df
df['uuid'] = uuid.uuid4()
logger.info(f"generated passengers uuids")

logger.info(f"passengers dim - found {len(df.index)} rows")
df.head(10)

[DEBUG][2023-01-16 17:04:22,511][4118144891:0005] : getting unique passengers...
[INFO ][2023-01-16 17:04:23,817][4118144891:0024] : generated passengers uuids
[INFO ][2023-01-16 17:04:23,819][4118144891:0026] : passengers dim - found 32 rows


Unnamed: 0_level_0,first_name,last_name,gender,birth_date,street,city,state,zip,start_date,end_date,uuid
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
annette.hawkins.43@yahoo.com,Annette,Hawkins,F,1943-07-11,361 Robinson Green Apt. 635,North Lynntown,NV,89825,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
autumn.morse.60@hotmail.com,Autumn,Morse,F,1960-01-18,6984 Price Shoals,Erictown,HI,96818,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
becky.clark.91@gmail.com,Becky,Clark,F,1991-11-09,691 Jones Cliffs,Michaelburgh,TX,76003,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
belinda.cook.91@hotmail.com,Belinda,Cook,F,1991-01-26,1965 Kelly Field Apt. 094,Jonesberg,IL,60613,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
carl.wilson.80@hotmail.com,Carl,Wilson,M,1980-04-24,2814 Houston Hills,Rodriguezside,IA,51971,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
cheryl.hughes.45@gmail.com,Cheryl,Hughes,NB,1945-05-20,00992 Garcia Plaza Suite 367,North Chelseamouth,CT,6315,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
christian.stevenson.93@hotmail.com,Christian,Stevenson,NB,1993-06-14,75945 Jennifer Loaf,Pooleland,KY,40009,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
corey.cook.83@gmail.com,Corey,Cook,M,1983-06-14,9606 Barton Station Apt. 271,Jacquelinemouth,IN,47081,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
danielle.henderson.70@hotmail.com,Danielle,Henderson,NB,1970-08-11,7389 Alec Squares Suite 508,Port Jonathan,NM,87320,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619
hannah.smith.66@gmail.com,Hannah,Smith,F,1966-07-01,230 Donna Street,Lake Adrianstad,MN,56413,2023-01-16,,1c47ba6b-6006-4d1c-b809-901c607d1619


In [54]:
# Load passengers dim into BigQuery
# get table name and schema from our TABLE_METADATA config param
table_name = f"{PROJECT_NAME}.{DATASET_NAME}.{TABLE_METADATA['passengers']['table_name']}"
schema = schema=TABLE_METADATA['passengers']['schema']
# load dataframe
load_table(df, client, table_name, schema)

logger.info(f"loaded passengers dim")

[INFO ][2023-01-16 15:41:16,221][1459098983:0031] : loading table: 'deb-01-372120.air_travel.passengers'


ArrowTypeError: object of type <class 'str'> cannot be converted to int

In [70]:
# Set Tickets Fact table with all columns except passenger
df = tickets_df
df = df[['eticket_num', 'confirmation', 'price', 'ticket_date', 'seat', 'status', 'airline.iata', 'origin.iata', 'destination.iata']]

# Set index
df = df.set_index(keys='eticket_num')

# Rename columns
df = df.rename(columns={'airline.iata':'airline', 'origin.iata':'origin', 'destination.iata':'destination'})

logger.info(f"tickets fact - found {len(df.index)} rows")
df.head(10)

[INFO ][2023-01-16 17:26:56,465][1337487685:0011] : tickets fact - found 4096 rows


Unnamed: 0_level_0,confirmation,price,ticket_date,seat,status,airline,origin,destination
eticket_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
498-938211-0795,ZVFDC4,723.42,2022-03-23,31I,active,MU,YUL,MDW
482-850738-6048,IL5GUI,765.18,2022-03-23,29B,active,HA,KWE,MNL
275-207321-8092,CYEFBC,753.89,2022-03-21,26I,active,W6,PVR,IBZ
246-793315-3102,ZNGPC2,793.89,2022-03-22,15A,active,AK,PMC,GDN
091-128904-1226,MGSBD9,820.25,2022-03-24,17F,active,MF,BWI,LGW
115-196069-8963,XFYQC0,892.69,2022-03-23,18C,active,NZ,ROV,CVG
396-673460-1326,N5UOOZ,889.53,2022-03-23,3C,active,7C,YWG,OKA
380-894599-8109,PAA19Y,706.78,2022-03-22,7D,active,AA,ONT,RGN
614-960971-2686,EF4BHJ,486.4,2022-03-23,24J,active,HO,SJU,SVX
481-321233-0702,FVM9EE,855.93,2022-03-23,16A,active,AT,YEG,TYN


In [61]:
# Lookup passenger UUID and join it to the tickets df
query = f"""
  SELECT
    uuid
  FROM
    `{PROJECT_NAME}.{DATASET_NAME}.passengers`
"""

pass_df = client.query(query).to_dataframe()

df = df.drop(columns='uuid', errors='ignore')

df = df.join(pass_df, how='right-join')

df.head()

NotFound: 404 Not found: Table deb-01-372120:air_travel.passengers was not found in location US

Location: US
Job ID: cc37c5ff-d471-4520-b4b3-d558cc16287c


In [71]:
# Load tickets fact table to BigQuery
# get table name and schema from our TABLE_METADATA config param
table_name = f"{PROJECT_NAME}.{DATASET_NAME}.{TABLE_METADATA['tickets']['table_name']}"
schema = schema=TABLE_METADATA['tickets']['schema']
# load dataframe
load_table(df, client, table_name, schema)

logger.info(f"loaded tickets dim")

[INFO ][2023-01-16 17:27:06,709][1459098983:0031] : loading table: 'deb-01-372120.air_travel.tickets'


ArrowTypeError: object of type <class 'str'> cannot be converted to int