In [1]:
## Optional
# Format output of Jupyter Notebook
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

# Hide python warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql.functions import isnull, when, count, col, regexp_extract

# TODO - parameterize endpoints/ports such as minio:9000 with envs

# Define config for SparkSession,
# such as Iceberg catalog that utilizes minio, an S3-compatible local object storage
spark_configs = {
    'spark.master': 'spark://spark-iceberg:7077',
    'spark.sql.catalog.airline': 'org.apache.iceberg.spark.SparkCatalog',
    # 'spark.sql.catalog.airline': 'org.apache.iceberg.spark.SparkSessionCatalog',
    'spark.sql.catalog.airline.io-impl': 'org.apache.iceberg.aws.s3.S3FileIO',
    'spark.sql.catalog.airline.s3.endpoint': 'http://minio:9000',
    'spark.sql.catalog.airline.type': 'rest',
    'spark.sql.catalog.airline.uri': 'http://rest:8181',
    'spark.sql.catalog.airline.warehouse': 's3://warehouse',
    'spark.sql.defaultCatalog': 'airline'
}

# Initialize SparkSession
spark = (
    SparkSession
    .builder
    .appName('EDA')
    .config(map=spark_configs)
    .getOrCreate()
)

spark.conf



filename = '/home/iceberg/data/flights.csv'

24/12/19 19:55:35 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [3]:
import pprint as pp

# print(spark.sparkContext.uiWebUrl)

print(spark.conf.get('spark.app.id'))
print(spark.conf.get('spark.app.name'))
print(spark.conf.get('spark.app.startTime'))
print(spark.conf.get('spark.master'))
print()
print(spark.conf.get('spark.sql.catalog.airline'))
print(spark.conf.get('spark.sql.catalog.airline.io-impl'))
print(spark.conf.get('spark.sql.catalog.airline.s3.endpoint'))
print(spark.conf.get('spark.sql.catalog.airline.type'))
print(spark.conf.get('spark.sql.catalog.airline.uri'))
print(spark.conf.get('spark.sql.catalog.airline.warehouse'))
print()
                     
# pp.pp(dict(sc.getConf().getAll()), sort_dicts=True, )

local-1734638114350
EDA
1734638113495
spark://spark-iceberg:7077

org.apache.iceberg.spark.SparkCatalog
org.apache.iceberg.aws.s3.S3FileIO
http://minio:9000
rest
http://rest:8181
s3://warehouse



# Ingest raw csv data

In [37]:
# Check for alphabetic characters in flight_number, this is useless...
raw_data = spark.read.option('header', True).csv(filename)
raw_data.where(regexp_extract('flight_number', '^\d+$', 0) == '').show()

[Stage 34:>                                                         (0 + 7) / 7]

+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|YEAR|MONTH|DAY|DAY_OF_WEEK|AIRLINE|FLIGHT_NUMBER|TAIL_NUMBER|ORIGIN_AIRPORT|DESTINATION_AIRPORT|SCHEDULED_DEPARTURE|DEPARTURE_TIME|DEPARTURE_DELAY|TAXI_OUT|WHEELS_OFF|SCHEDULED_TIME|ELAPSED_TIME|AIR_TIME|DISTANCE|WHEELS_ON|TAXI_IN|SCHEDULED_ARRIVAL|ARRIVAL_TIME|ARRIVAL_DELAY|DIVERTED|CANCELLED|CANCELLATION_REASON|AIR_SYSTEM_DELAY|SECURITY_DELAY|AIRLINE_DELAY|LATE_AIRCRAFT_DELAY|WEATHER_DELAY|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

                                                                                

In [8]:
# TODO: Fix nullables column, cant set column as Not-Nullable for some reason...

# Define schema
flights_schema = StructType([
    StructField("year", IntegerType(), False),   # Non-Nullable
    StructField("month", IntegerType(), False),  # Non-Nullable
    StructField("day", IntegerType(), False),    # Non-Nullable
    StructField("day_of_week", IntegerType(), False),    # Non-Nullable
    StructField("airline", StringType(), False),         # Non-Nullable
    StructField("flight_number", IntegerType(), False),  # Non-Nullable
    StructField("tail_number", StringType(), True),
    StructField("origin_airport", StringType(), False),  # Non-Nullable
    StructField("destination_airport", StringType(), False),   # Non-Nullable
    StructField("scheduled_departure", IntegerType(), False),  # Non-Nullable
    StructField("departure_time", IntegerType(), True),
    StructField("departure_delay", IntegerType(), True),
    StructField("taxi_out", IntegerType(), True),
    StructField("wheels_off", IntegerType(), True),
    StructField("scheduled_time", IntegerType(), True),
    StructField("elapsed_time", IntegerType(), True),
    StructField("air_time", IntegerType(), True),
    StructField("distance", IntegerType(), False),  # Non-Nullable
    StructField("wheels_on", IntegerType(), True),
    StructField("taxi_in", IntegerType(), True),
    StructField("scheduled_arrival", IntegerType(), False),  # Non-Nullable
    StructField("arrival_time", IntegerType(), True),
    StructField("arrival_delay", IntegerType(), True),
    StructField("diverted", IntegerType(), False),   # Non-Nullable
    StructField("cancelled", IntegerType(), False),  # Non-Nullable
    StructField("cancellation_reason", StringType(), True),
    StructField("air_system_delay", IntegerType(), True),
    StructField("security_delay", IntegerType(), True),
    StructField("airline_delay", IntegerType(), True),
    StructField("late_aircraft_delay", IntegerType(), True),
    StructField("weather_delay", IntegerType(), True)
])

# Read flights.csv
flights_df = spark.read.schema(flights_schema).csv(
    '/home/iceberg/data/flights.csv',
    schema=flights_schema,
    enforceSchema=True,
    header = True
).cache()

flights_df.show()
flights_df.printSchema()
flights_df.explain()

24/12/19 20:20:22 WARN CacheManager: Asked to cache already cached data.


+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|year|month|day|day_of_week|airline|flight_number|tail_number|origin_airport|destination_airport|scheduled_departure|departure_time|departure_delay|taxi_out|wheels_off|scheduled_time|elapsed_time|air_time|distance|wheels_on|taxi_in|scheduled_arrival|arrival_time|arrival_delay|diverted|cancelled|cancellation_reason|air_system_delay|security_delay|airline_delay|late_aircraft_delay|weather_delay|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

In [None]:
flights.filter('year')

# Explore data!
TODO:
- Check statistics of important columns, like what are the percentiles for departure/arrival delay?
- Check max values for delay

## Total number of flight records: 5,819,079

In [32]:
flights_df.count()

                                                                                

5819079

## Total number of cancelled flights: 89,884, Total number of diverted flights: 15,187
These will cause nulls in other columns.

In [37]:
flights_df.where('cancelled = 1').count()

                                                                                

89884

In [35]:
flights_df.where('diverted = 1').count()

                                                                                

15187

## Get Null counts for each column

In [31]:
flights_df.select([count(when(isnull(c), c)).alias(c) for c in data.columns]).show()



+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|YEAR|MONTH|DAY|DAY_OF_WEEK|AIRLINE|FLIGHT_NUMBER|TAIL_NUMBER|ORIGIN_AIRPORT|DESTINATION_AIRPORT|SCHEDULED_DEPARTURE|DEPARTURE_TIME|DEPARTURE_DELAY|TAXI_OUT|WHEELS_OFF|SCHEDULED_TIME|ELAPSED_TIME|AIR_TIME|DISTANCE|WHEELS_ON|TAXI_IN|SCHEDULED_ARRIVAL|ARRIVAL_TIME|ARRIVAL_DELAY|DIVERTED|CANCELLED|CANCELLATION_REASON|AIR_SYSTEM_DELAY|SECURITY_DELAY|AIRLINE_DELAY|LATE_AIRCRAFT_DELAY|WEATHER_DELAY|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

                                                                                

## Investigate some causes for nulls in each column

In [48]:
# List of columns with null values
null_columns = [col for col in flights_df.columns if flights_df.filter(f"{col} IS NULL").count() > 0]

# Iterate through columns to investigate null causes
for col in null_columns:
    print(f"Investigating nulls in column: {col}")
    # Count rows where column is null and cancelled == 1
    count = flights_df.where(f"`{col}` IS NULL AND cancelled = 1").count()
    print(f"Nulls in {col} where flight is cancelled: {count}\n")

                                                                                

Investigating nulls in column: tail_number


                                                                                

Nulls in tail_number where flight is cancelled: 14721

Investigating nulls in column: departure_time


                                                                                

Nulls in departure_time where flight is cancelled: 86153

Investigating nulls in column: departure_delay


                                                                                

Nulls in departure_delay where flight is cancelled: 86153

Investigating nulls in column: taxi_out


                                                                                

Nulls in taxi_out where flight is cancelled: 89047

Investigating nulls in column: wheels_off


                                                                                

Nulls in wheels_off where flight is cancelled: 89047

Investigating nulls in column: scheduled_time


                                                                                

Nulls in scheduled_time where flight is cancelled: 5

Investigating nulls in column: elapsed_time


                                                                                

Nulls in elapsed_time where flight is cancelled: 89884

Investigating nulls in column: air_time


                                                                                

Nulls in air_time where flight is cancelled: 89884

Investigating nulls in column: wheels_on


                                                                                

Nulls in wheels_on where flight is cancelled: 89884

Investigating nulls in column: taxi_in


                                                                                

Nulls in taxi_in where flight is cancelled: 89884

Investigating nulls in column: arrival_time


                                                                                

Nulls in arrival_time where flight is cancelled: 89884

Investigating nulls in column: arrival_delay


                                                                                

Nulls in arrival_delay where flight is cancelled: 89884

Investigating nulls in column: cancellation_reason


                                                                                

Nulls in cancellation_reason where flight is cancelled: 0

Investigating nulls in column: air_system_delay


                                                                                

Nulls in air_system_delay where flight is cancelled: 89884

Investigating nulls in column: security_delay


                                                                                

Nulls in security_delay where flight is cancelled: 89884

Investigating nulls in column: airline_delay


                                                                                

Nulls in airline_delay where flight is cancelled: 89884

Investigating nulls in column: late_aircraft_delay


                                                                                

Nulls in late_aircraft_delay where flight is cancelled: 89884

Investigating nulls in column: weather_delay


[Stage 290:>                                                      (0 + 12) / 12]

Nulls in weather_delay where flight is cancelled: 89884



                                                                                

#### Tail_number - all Nulls are from cancelled flights
Although, not all cancelled flights have Nulls in tail_number. This might be explained by flights that are cancelled well in advance.
**TODO: Check cancellation reasons**

In [40]:
flights_df.where('tail_number IS NULL AND cancelled = 1').count()

                                                                                

14721

In [44]:
flights_df.where('departure_time IS NULL and cancelled = 1').count()

                                                                                

86153

#### Scheduled_time - 6 Nulls, 5 out of 6 flight were cancelled, 1 flight was diverted
Scheduled_time is the estimated time in air?

In [28]:
flights_df.where('scheduled_time IS NULL').show()

[Stage 19:>                                                         (0 + 7) / 7]

+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|year|month|day|day_of_week|airline|flight_number|tail_number|origin_airport|destination_airport|scheduled_departure|departure_time|departure_delay|taxi_out|wheels_off|scheduled_time|elapsed_time|air_time|distance|wheels_on|taxi_in|scheduled_arrival|arrival_time|arrival_delay|diverted|cancelled|cancellation_reason|air_system_delay|security_delay|airline_delay|late_aircraft_delay|weather_delay|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

                                                                                

# Write to staging Iceberg Table

In [26]:
# Drop flights table
spark.sql('DROP TABLE airline.db.flights PURGE')

DataFrame[]

In [27]:
# Create flights table if not exists
spark.sql("""
CREATE TABLE IF NOT EXISTS airline.db.flights (
    year INT,
    month INT,
    day INT,
    day_of_week INT,
    airline STRING,
    flight_number INT,
    tail_number STRING,
    origin_airport STRING,
    destination_airport STRING,
    scheduled_departure INT,
    departure_time INT,
    departure_delay INT,
    taxi_out INT,
    wheels_off INT,
    scheduled_time INT,
    elapsed_time INT,
    air_time INT,
    distance INT,
    wheels_on INT,
    taxi_in INT,
    scheduled_arrival INT,
    arrival_time INT,
    arrival_delay INT,
    diverted INT,
    cancelled INT,
    cancellation_reason STRING,
    air_system_delay INT,
    security_delay INT,
    airline_delay INT,
    late_aircraft_delay INT,
    weather_delay INT
)
USING iceberg
PARTITIONED BY (month)
""")

DataFrame[]

In [24]:
# # Create database if not exists
# spark.sql('CREATE DATABASE IF NOT EXISTS db')

# print(spark.catalog.tableExists('airline.db.flights'))

# # Create flights table if not exists
# if not spark.catalog.tableExists('airline.db.flights'):
#     spark.catalog.createTable(
#         'airline.db.flights',
#         schema=flights_schema,
#         source='iceberg',
#         partitioning='month'
#     )

# spark.catalog.getTable('airline.db.flights')

False


Table(name='flights', catalog='airline', namespace=['db'], description=None, tableType='MANAGED', isTemporary=False)

In [28]:
# Write flights data
flights_df \
    .writeTo('airline.db.flights') \
    .append()
    # .sort('day_of_week') \

                                                                                

In [29]:
spark.sql('DESCRIBE EXTENDED airline.db.flights').show()

+-------------------+---------+-------+
|           col_name|data_type|comment|
+-------------------+---------+-------+
|               year|      int|   NULL|
|              month|      int|   NULL|
|                day|      int|   NULL|
|        day_of_week|      int|   NULL|
|            airline|   string|   NULL|
|      flight_number|      int|   NULL|
|        tail_number|   string|   NULL|
|     origin_airport|   string|   NULL|
|destination_airport|   string|   NULL|
|scheduled_departure|      int|   NULL|
|     departure_time|      int|   NULL|
|    departure_delay|      int|   NULL|
|           taxi_out|      int|   NULL|
|         wheels_off|      int|   NULL|
|     scheduled_time|      int|   NULL|
|       elapsed_time|      int|   NULL|
|           air_time|      int|   NULL|
|           distance|      int|   NULL|
|          wheels_on|      int|   NULL|
|            taxi_in|      int|   NULL|
+-------------------+---------+-------+
only showing top 20 rows



#### HIDDEN PARTITIONING!?
When partitioning on year, still getting 12 files, but still 1 partition.

Dataset came sorted by date, so hidden partitioning was automatically splitting into 12 parquet files?

When partitioning on month, what is the behavior?

In [24]:
metrics = spark.sql('select readable_metrics from airline.db.flights.files').collect()

for metric in metrics:
    temp = metric.asDict()['readable_metrics'].asDict()['month']
    print(temp)

Row(column_size=2658, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)
Row(column_size=2585, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)
Row(column_size=2603, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)
Row(column_size=2647, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)
Row(column_size=2643, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)
Row(column_size=2128, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)
Row(column_size=2456, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12)


#### Checking table metadata

In [29]:
%%sql
SELECT
    *
FROM airline.db.flights.partitions

record_count,file_count,total_data_file_size_in_bytes,position_delete_record_count,position_delete_file_count,equality_delete_record_count,equality_delete_file_count,last_updated_at,last_updated_snapshot_id
5819079,7,132410345,0,0,0,0,2024-12-19 05:27:21.064000,2043671014194954771


In [28]:
%%sql
SELECT
    *
FROM airline.db.flights.files

content,file_path,file_format,spec_id,record_count,file_size_in_bytes,column_sizes,value_counts,null_value_counts,nan_value_counts,lower_bounds,upper_bounds,key_metadata,split_offsets,equality_ids,sort_order_id,readable_metrics
0,s3://warehouse/db/flights/data/00000-69-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,865543,19788655,"{1: 1702, 2: 2658, 3: 3393, 4: 1702, 5: 312677, 6: 1424769, 7: 1432183, 8: 929096, 9: 903891, 10: 481052, 11: 1215732, 12: 872911, 13: 572179, 14: 1216038, 15: 999944, 16: 1051864, 17: 1046375, 18: 1196367, 19: 1216524, 20: 490227, 21: 1196506, 22: 1216527, 23: 939979, 24: 9772, 25: 31761, 26: 36382, 27: 222731, 28: 102995, 29: 228256, 30: 251468, 31: 139256}","{1: 865543, 2: 865543, 3: 865543, 4: 865543, 5: 865543, 6: 865543, 7: 865543, 8: 865543, 9: 865543, 10: 865543, 11: 865543, 12: 865543, 13: 865543, 14: 865543, 15: 865543, 16: 865543, 17: 865543, 18: 865543, 19: 865543, 20: 865543, 21: 865543, 22: 865543, 23: 865543, 24: 865543, 25: 865543, 26: 865543, 27: 865543, 28: 865543, 29: 865543, 30: 865543, 31: 865543}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 3777, 8: 0, 9: 0, 10: 0, 11: 20255, 12: 20255, 13: 20913, 14: 20913, 15: 1, 16: 23749, 17: 23749, 18: 0, 19: 21643, 20: 21643, 21: 0, 22: 21643, 23: 23749, 24: 0, 25: 0, 26: 844470, 27: 699277, 28: 699277, 29: 699277, 30: 699277, 31: 699277}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xc3\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0e\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x15\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xaf\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'h$\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'Y\x06\x00\x00'), 13: bytearray(b'\xc8\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xcb\x02\x00\x00'), 17: bytearray(b'\xaf\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xf8\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'd\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'\xdf\x03\x00\x00'), 28: bytearray(b'\xdd\x00\x00\x00'), 29: bytearray(b'Y\x06\x00\x00'), 30: bytearray(b'\xde\x03\x00\x00'), 31: bytearray(b'\\\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=222731, value_count=865543, null_value_count=699277, nan_value_count=None, lower_bound=0, upper_bound=991), air_time=Row(column_size=1046375, value_count=865543, null_value_count=23749, nan_value_count=None, lower_bound=8, upper_bound=687), airline=Row(column_size=312677, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=228256, value_count=865543, null_value_count=699277, nan_value_count=None, lower_bound=0, upper_bound=1625), arrival_delay=Row(column_size=939979, value_count=865543, null_value_count=23749, nan_value_count=None, lower_bound=-81, upper_bound=1636), arrival_time=Row(column_size=1216527, value_count=865543, null_value_count=21643, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=36382, value_count=865543, null_value_count=844470, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=31761, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=3393, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1702, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=1), departure_delay=Row(column_size=872911, value_count=865543, null_value_count=20255, nan_value_count=None, lower_bound=-61, upper_bound=1625), departure_time=Row(column_size=1215732, value_count=865543, null_value_count=20255, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=903891, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=1196367, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=21, upper_bound=4983), diverted=Row(column_size=9772, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=1051864, value_count=865543, null_value_count=23749, nan_value_count=None, lower_bound=14, upper_bound=715), flight_number=Row(column_size=1424769, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=9320), late_aircraft_delay=Row(column_size=251468, value_count=865543, null_value_count=699277, nan_value_count=None, lower_bound=0, upper_bound=990), month=Row(column_size=2658, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=929096, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=1196506, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=481052, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=999944, value_count=865543, null_value_count=1, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=102995, value_count=865543, null_value_count=699277, nan_value_count=None, lower_bound=0, upper_bound=221), tail_number=Row(column_size=1432183, value_count=865543, null_value_count=3777, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=490227, value_count=865543, null_value_count=21643, nan_value_count=None, lower_bound=1, upper_bound=248), taxi_out=Row(column_size=572179, value_count=865543, null_value_count=20913, nan_value_count=None, lower_bound=1, upper_bound=200), weather_delay=Row(column_size=139256, value_count=865543, null_value_count=699277, nan_value_count=None, lower_bound=0, upper_bound=1116), wheels_off=Row(column_size=1216038, value_count=865543, null_value_count=20913, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=1216524, value_count=865543, null_value_count=21643, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1702, value_count=865543, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/00001-70-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,844600,19204359,"{1: 1664, 2: 2585, 3: 3348, 4: 1664, 5: 304370, 6: 1390440, 7: 1396127, 8: 889416, 9: 872181, 10: 474746, 11: 1188282, 12: 811925, 13: 541951, 14: 1188714, 15: 954937, 16: 1021068, 17: 1016790, 18: 1167465, 19: 1188795, 20: 539687, 21: 1167580, 22: 1188796, 23: 916552, 24: 8265, 25: 26430, 26: 30086, 27: 221740, 28: 97691, 29: 212559, 30: 212690, 31: 124873}","{1: 844600, 2: 844600, 3: 844600, 4: 844600, 5: 844600, 6: 844600, 7: 844600, 8: 844600, 9: 844600, 10: 844600, 11: 844600, 12: 844600, 13: 844600, 14: 844600, 15: 844600, 16: 844600, 17: 844600, 18: 844600, 19: 844600, 20: 844600, 21: 844600, 22: 844600, 23: 844600, 24: 844600, 25: 844600, 26: 844600, 27: 844600, 28: 844600, 29: 844600, 30: 844600, 31: 844600}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 3046, 8: 0, 9: 0, 10: 0, 11: 14609, 12: 14609, 13: 14953, 14: 14953, 15: 1, 16: 17201, 17: 17201, 18: 0, 19: 15373, 20: 15373, 21: 0, 22: 15373, 23: 17201, 24: 0, 25: 0, 26: 829528, 27: 693417, 28: 693417, 29: 693417, 30: 693417, 31: 693417}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x02\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd0\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb0\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x02\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'I\x06\x00\x00'), 13: bytearray(b'\xad\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xd4\x02\x00\x00'), 17: bytearray(b'\xa2\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xae\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'>\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'>\x03\x00\x00'), 28: bytearray(b'\x00\x01\x00\x00'), 29: bytearray(b'\x1b\x06\x00\x00'), 30: bytearray(b'\xd7\x03\x00\x00'), 31: bytearray(b'\x0b\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=221740, value_count=844600, null_value_count=693417, nan_value_count=None, lower_bound=0, upper_bound=830), air_time=Row(column_size=1016790, value_count=844600, null_value_count=17201, nan_value_count=None, lower_bound=7, upper_bound=674), airline=Row(column_size=304370, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=212559, value_count=844600, null_value_count=693417, nan_value_count=None, lower_bound=0, upper_bound=1563), arrival_delay=Row(column_size=916552, value_count=844600, null_value_count=17201, nan_value_count=None, lower_bound=-80, upper_bound=1598), arrival_time=Row(column_size=1188796, value_count=844600, null_value_count=15373, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=30086, value_count=844600, null_value_count=829528, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=26430, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=3348, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1664, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=2, upper_bound=2), departure_delay=Row(column_size=811925, value_count=844600, null_value_count=14609, nan_value_count=None, lower_bound=-48, upper_bound=1609), departure_time=Row(column_size=1188282, value_count=844600, null_value_count=14609, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=872181, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=1167465, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=8265, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=1021068, value_count=844600, null_value_count=17201, nan_value_count=None, lower_bound=15, upper_bound=724), flight_number=Row(column_size=1390440, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=212690, value_count=844600, null_value_count=693417, nan_value_count=None, lower_bound=0, upper_bound=983), month=Row(column_size=2585, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=889416, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=1167580, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=474746, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=954937, value_count=844600, null_value_count=1, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=97691, value_count=844600, null_value_count=693417, nan_value_count=None, lower_bound=0, upper_bound=256), tail_number=Row(column_size=1396127, value_count=844600, null_value_count=3046, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=539687, value_count=844600, null_value_count=15373, nan_value_count=None, lower_bound=1, upper_bound=174), taxi_out=Row(column_size=541951, value_count=844600, null_value_count=14953, nan_value_count=None, lower_bound=1, upper_bound=173), weather_delay=Row(column_size=124873, value_count=844600, null_value_count=693417, nan_value_count=None, lower_bound=0, upper_bound=1035), wheels_off=Row(column_size=1188714, value_count=844600, null_value_count=14953, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=1188795, value_count=844600, null_value_count=15373, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1664, value_count=844600, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/00002-71-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,855897,19411983,"{1: 1666, 2: 2603, 3: 3325, 4: 1665, 5: 306658, 6: 1408664, 7: 1414097, 8: 919819, 9: 895152, 10: 478785, 11: 1202356, 12: 841793, 13: 547089, 14: 1202811, 15: 951468, 16: 1034900, 17: 1031087, 18: 1183041, 19: 1203475, 20: 494205, 21: 1183140, 22: 1203473, 23: 907473, 24: 7937, 25: 22187, 26: 24926, 27: 209035, 28: 100416, 29: 228371, 30: 233573, 31: 125823}","{1: 855897, 2: 855897, 3: 855897, 4: 855897, 5: 855897, 6: 855897, 7: 855897, 8: 855897, 9: 855897, 10: 855897, 11: 855897, 12: 855897, 13: 855897, 14: 855897, 15: 855897, 16: 855897, 17: 855897, 18: 855897, 19: 855897, 20: 855897, 21: 855897, 22: 855897, 23: 855897, 24: 855897, 25: 855897, 26: 855897, 27: 855897, 28: 855897, 29: 855897, 30: 855897, 31: 855897}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1956, 8: 0, 9: 0, 10: 0, 11: 10314, 12: 10314, 13: 10619, 14: 10619, 15: 0, 16: 12655, 17: 12655, 18: 0, 19: 11051, 20: 11051, 21: 0, 22: 11051, 23: 12655, 24: 0, 25: 0, 26: 845168, 27: 702293, 28: 702293, 29: 702293, 30: 702293, 31: 702293}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x03\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x03\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xc8\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0e\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xae\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1e\x00\x00\x00'), 4: bytearray(b'\x03\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\xfa \x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'5\x06\x00\x00'), 13: bytearray(b'\xb1\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xda\x02\x00\x00'), 17: bytearray(b'\xb2\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\x9d\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'(\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'n\x04\x00\x00'), 28: bytearray(b'=\x02\x00\x00'), 29: bytearray(b'(\x06\x00\x00'), 30: bytearray(b'\x0e\x05\x00\x00'), 31: bytearray(b'`\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=209035, value_count=855897, null_value_count=702293, nan_value_count=None, lower_bound=0, upper_bound=1134), air_time=Row(column_size=1031087, value_count=855897, null_value_count=12655, nan_value_count=None, lower_bound=7, upper_bound=690), airline=Row(column_size=306658, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=228371, value_count=855897, null_value_count=702293, nan_value_count=None, lower_bound=0, upper_bound=1576), arrival_delay=Row(column_size=907473, value_count=855897, null_value_count=12655, nan_value_count=None, lower_bound=-82, upper_bound=1576), arrival_time=Row(column_size=1203473, value_count=855897, null_value_count=11051, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=24926, value_count=855897, null_value_count=845168, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=22187, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=3325, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=30), day_of_week=Row(column_size=1665, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=3), departure_delay=Row(column_size=841793, value_count=855897, null_value_count=10314, nan_value_count=None, lower_bound=-56, upper_bound=1589), departure_time=Row(column_size=1202356, value_count=855897, null_value_count=10314, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=895152, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=1183041, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=7937, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=1034900, value_count=855897, null_value_count=12655, nan_value_count=None, lower_bound=14, upper_bound=730), flight_number=Row(column_size=1408664, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=8442), late_aircraft_delay=Row(column_size=233573, value_count=855897, null_value_count=702293, nan_value_count=None, lower_bound=0, upper_bound=1294), month=Row(column_size=2603, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=919819, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=1183140, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=478785, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=2359), scheduled_time=Row(column_size=951468, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=100416, value_count=855897, null_value_count=702293, nan_value_count=None, lower_bound=0, upper_bound=573), tail_number=Row(column_size=1414097, value_count=855897, null_value_count=1956, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=494205, value_count=855897, null_value_count=11051, nan_value_count=None, lower_bound=1, upper_bound=157), taxi_out=Row(column_size=547089, value_count=855897, null_value_count=10619, nan_value_count=None, lower_bound=1, upper_bound=177), weather_delay=Row(column_size=125823, value_count=855897, null_value_count=702293, nan_value_count=None, lower_bound=0, upper_bound=1120), wheels_off=Row(column_size=1202811, value_count=855897, null_value_count=10619, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=1203475, value_count=855897, null_value_count=11051, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1666, value_count=855897, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/00003-72-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,872521,19891606,"{1: 1704, 2: 2647, 3: 3440, 4: 1704, 5: 312706, 6: 1436485, 7: 1440750, 8: 944899, 9: 913692, 10: 475712, 11: 1221297, 12: 868385, 13: 559815, 14: 1222104, 15: 1011442, 16: 1062496, 17: 1058106, 18: 1205971, 19: 1224786, 20: 473173, 21: 1206114, 22: 1224787, 23: 939075, 24: 8944, 25: 23004, 26: 26249, 27: 245428, 28: 105085, 29: 254678, 30: 240179, 31: 134995}","{1: 872521, 2: 872521, 3: 872521, 4: 872521, 5: 872521, 6: 872521, 7: 872521, 8: 872521, 9: 872521, 10: 872521, 11: 872521, 12: 872521, 13: 872521, 14: 872521, 15: 872521, 16: 872521, 17: 872521, 18: 872521, 19: 872521, 20: 872521, 21: 872521, 22: 872521, 23: 872521, 24: 872521, 25: 872521, 26: 872521, 27: 872521, 28: 872521, 29: 872521, 30: 872521, 31: 872521}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1906, 8: 0, 9: 0, 10: 0, 11: 11741, 12: 11741, 13: 12182, 14: 12182, 15: 0, 16: 14635, 17: 14635, 18: 0, 19: 12725, 20: 12725, 21: 0, 22: 12725, 23: 14635, 24: 0, 25: 0, 26: 860230, 27: 700829, 28: 700829, 29: 700829, 30: 700829, 31: 700829}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x04\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x03\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd3\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xa9\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x04\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x7f&\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'q\x06\x00\x00'), 13: bytearray(b'\xb1\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xfe\x02\x00\x00'), 17: bytearray(b'\xb2\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xaf\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'd\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'x\x03\x00\x00'), 28: bytearray(b'l\x01\x00\x00'), 29: bytearray(b'd\x06\x00\x00'), 30: bytearray(b'\t\x04\x00\x00'), 31: bytearray(b',\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=245428, value_count=872521, null_value_count=700829, nan_value_count=None, lower_bound=0, upper_bound=888), air_time=Row(column_size=1058106, value_count=872521, null_value_count=14635, nan_value_count=None, lower_bound=8, upper_bound=690), airline=Row(column_size=312706, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=254678, value_count=872521, null_value_count=700829, nan_value_count=None, lower_bound=0, upper_bound=1636), arrival_delay=Row(column_size=939075, value_count=872521, null_value_count=14635, nan_value_count=None, lower_bound=-87, upper_bound=1636), arrival_time=Row(column_size=1224787, value_count=872521, null_value_count=12725, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=26249, value_count=872521, null_value_count=860230, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=23004, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=3440, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1704, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=4, upper_bound=4), departure_delay=Row(column_size=868385, value_count=872521, null_value_count=11741, nan_value_count=None, lower_bound=-45, upper_bound=1649), departure_time=Row(column_size=1221297, value_count=872521, null_value_count=11741, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=913692, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=1205971, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=8944, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=1062496, value_count=872521, null_value_count=14635, nan_value_count=None, lower_bound=15, upper_bound=766), flight_number=Row(column_size=1436485, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=9855), late_aircraft_delay=Row(column_size=240179, value_count=872521, null_value_count=700829, nan_value_count=None, lower_bound=0, upper_bound=1033), month=Row(column_size=2647, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=944899, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=1206114, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=475712, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=2359), scheduled_time=Row(column_size=1011442, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=105085, value_count=872521, null_value_count=700829, nan_value_count=None, lower_bound=0, upper_bound=364), tail_number=Row(column_size=1440750, value_count=872521, null_value_count=1906, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=473173, value_count=872521, null_value_count=12725, nan_value_count=None, lower_bound=1, upper_bound=175), taxi_out=Row(column_size=559815, value_count=872521, null_value_count=12182, nan_value_count=None, lower_bound=1, upper_bound=177), weather_delay=Row(column_size=134995, value_count=872521, null_value_count=700829, nan_value_count=None, lower_bound=0, upper_bound=1068), wheels_off=Row(column_size=1222104, value_count=872521, null_value_count=12182, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=1224786, value_count=872521, null_value_count=12725, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1704, value_count=872521, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/00004-73-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,862209,19622735,"{1: 1702, 2: 2643, 3: 3426, 4: 1702, 5: 311025, 6: 1419751, 7: 1423335, 8: 893387, 9: 910771, 10: 472141, 11: 1210384, 12: 837560, 13: 561090, 14: 1211011, 15: 988620, 16: 1048493, 17: 1042800, 18: 1191785, 19: 1211700, 20: 510733, 21: 1191916, 22: 1211701, 23: 921577, 24: 8155, 25: 20262, 26: 22700, 27: 231665, 28: 103797, 29: 244867, 30: 240918, 31: 129396}","{1: 862209, 2: 862209, 3: 862209, 4: 862209, 5: 862209, 6: 862209, 7: 862209, 8: 862209, 9: 862209, 10: 862209, 11: 862209, 12: 862209, 13: 862209, 14: 862209, 15: 862209, 16: 862209, 17: 862209, 18: 862209, 19: 862209, 20: 862209, 21: 862209, 22: 862209, 23: 862209, 24: 862209, 25: 862209, 26: 862209, 27: 862209, 28: 862209, 29: 862209, 30: 862209, 31: 862209}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1139, 8: 0, 9: 0, 10: 0, 11: 8325, 12: 8325, 13: 8709, 14: 8709, 15: 0, 16: 10822, 17: 10822, 18: 0, 19: 9070, 20: 9070, 21: 0, 22: 9070, 23: 10822, 24: 0, 25: 0, 26: 853404, 27: 698768, 28: 698768, 29: 698768, 30: 698768, 31: 698768}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x05\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x03\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xc9\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x10\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb1\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x05\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\xc4\x07\x00\x00'), 13: bytearray(b'\xe1\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xd1\x02\x00\x00'), 17: bytearray(b'\xa7\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xb7\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\xb3\x07\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'\x94\x03\x00\x00'), 28: bytearray(b'\xdd\x00\x00\x00'), 29: bytearray(b'\xb3\x07\x00\x00'), 30: bytearray(b'\x0f\x04\x00\x00'), 31: bytearray(b'\xfd\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=231665, value_count=862209, null_value_count=698768, nan_value_count=None, lower_bound=0, upper_bound=916), air_time=Row(column_size=1042800, value_count=862209, null_value_count=10822, nan_value_count=None, lower_bound=7, upper_bound=679), airline=Row(column_size=311025, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=244867, value_count=862209, null_value_count=698768, nan_value_count=None, lower_bound=0, upper_bound=1971), arrival_delay=Row(column_size=921577, value_count=862209, null_value_count=10822, nan_value_count=None, lower_bound=-79, upper_bound=1971), arrival_time=Row(column_size=1211701, value_count=862209, null_value_count=9070, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=22700, value_count=862209, null_value_count=853404, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=20262, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=3426, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1702, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=5), departure_delay=Row(column_size=837560, value_count=862209, null_value_count=8325, nan_value_count=None, lower_bound=-55, upper_bound=1988), departure_time=Row(column_size=1210384, value_count=862209, null_value_count=8325, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=910771, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=1191785, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=8155, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=1048493, value_count=862209, null_value_count=10822, nan_value_count=None, lower_bound=16, upper_bound=721), flight_number=Row(column_size=1419751, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=240918, value_count=862209, null_value_count=698768, nan_value_count=None, lower_bound=0, upper_bound=1039), month=Row(column_size=2643, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=893387, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=1191916, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=472141, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=2359), scheduled_time=Row(column_size=988620, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=103797, value_count=862209, null_value_count=698768, nan_value_count=None, lower_bound=0, upper_bound=221), tail_number=Row(column_size=1423335, value_count=862209, null_value_count=1139, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=510733, value_count=862209, null_value_count=9070, nan_value_count=None, lower_bound=1, upper_bound=183), taxi_out=Row(column_size=561090, value_count=862209, null_value_count=8709, nan_value_count=None, lower_bound=1, upper_bound=225), weather_delay=Row(column_size=129396, value_count=862209, null_value_count=698768, nan_value_count=None, lower_bound=0, upper_bound=1021), wheels_off=Row(column_size=1211011, value_count=862209, null_value_count=8709, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=1211700, value_count=862209, null_value_count=9070, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1702, value_count=862209, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/00005-74-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,700545,15925957,"{1: 1398, 2: 2128, 3: 2822, 4: 1398, 5: 257560, 6: 1156818, 7: 1159274, 8: 754731, 9: 726942, 10: 412854, 11: 980802, 12: 683691, 13: 442397, 14: 981158, 15: 822092, 16: 854443, 17: 849759, 18: 968890, 19: 984263, 20: 413531, 21: 969020, 22: 984262, 23: 745456, 24: 7564, 25: 17552, 26: 19631, 27: 162608, 28: 80434, 29: 176868, 30: 170022, 31: 100274}","{1: 700545, 2: 700545, 3: 700545, 4: 700545, 5: 700545, 6: 700545, 7: 700545, 8: 700545, 9: 700545, 10: 700545, 11: 700545, 12: 700545, 13: 700545, 14: 700545, 15: 700545, 16: 700545, 17: 700545, 18: 700545, 19: 700545, 20: 700545, 21: 700545, 22: 700545, 23: 700545, 24: 700545, 25: 700545, 26: 700545, 27: 700545, 28: 700545, 29: 700545, 30: 700545, 31: 700545}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 968, 8: 0, 9: 0, 10: 0, 11: 8292, 12: 8292, 13: 8650, 14: 8650, 15: 1, 16: 10800, 17: 10800, 18: 0, 19: 9142, 20: 9142, 21: 0, 22: 9142, 23: 10800, 24: 0, 25: 0, 26: 691796, 27: 590230, 28: 590230, 29: 590230, 30: 590230, 31: 590230}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x06\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xbc\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x10\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xa9\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x06\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\xfd \x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'V\x07\x00\x00'), 13: bytearray(b'\xb0\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xdf\x02\x00\x00'), 17: bytearray(b'\xab\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xca\x00\x00\x00'), 21: bytearray(b'`\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'j\x07\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'\x19\x04\x00\x00'), 28: bytearray(b'\xed\x00\x00\x00'), 29: bytearray(b'V\x07\x00\x00'), 30: bytearray(b'\xe8\x04\x00\x00'), 31: bytearray(b'\x80\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=162608, value_count=700545, null_value_count=590230, nan_value_count=None, lower_bound=0, upper_bound=1049), air_time=Row(column_size=849759, value_count=700545, null_value_count=10800, nan_value_count=None, lower_bound=8, upper_bound=683), airline=Row(column_size=257560, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=176868, value_count=700545, null_value_count=590230, nan_value_count=None, lower_bound=0, upper_bound=1878), arrival_delay=Row(column_size=745456, value_count=700545, null_value_count=10800, nan_value_count=None, lower_bound=-87, upper_bound=1898), arrival_time=Row(column_size=984262, value_count=700545, null_value_count=9142, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=19631, value_count=700545, null_value_count=691796, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=17552, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=2822, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1398, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=6, upper_bound=6), departure_delay=Row(column_size=683691, value_count=700545, null_value_count=8292, nan_value_count=None, lower_bound=-68, upper_bound=1878), departure_time=Row(column_size=980802, value_count=700545, null_value_count=8292, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=726942, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=968890, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=7564, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=854443, value_count=700545, null_value_count=10800, nan_value_count=None, lower_bound=16, upper_bound=735), flight_number=Row(column_size=1156818, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=8445), late_aircraft_delay=Row(column_size=170022, value_count=700545, null_value_count=590230, nan_value_count=None, lower_bound=0, upper_bound=1256), month=Row(column_size=2128, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=754731, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=969020, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2400), scheduled_departure=Row(column_size=412854, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=822092, value_count=700545, null_value_count=1, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=80434, value_count=700545, null_value_count=590230, nan_value_count=None, lower_bound=0, upper_bound=237), tail_number=Row(column_size=1159274, value_count=700545, null_value_count=968, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=413531, value_count=700545, null_value_count=9142, nan_value_count=None, lower_bound=1, upper_bound=202), taxi_out=Row(column_size=442397, value_count=700545, null_value_count=8650, nan_value_count=None, lower_bound=1, upper_bound=176), weather_delay=Row(column_size=100274, value_count=700545, null_value_count=590230, nan_value_count=None, lower_bound=0, upper_bound=1152), wheels_off=Row(column_size=981158, value_count=700545, null_value_count=8650, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=984263, value_count=700545, null_value_count=9142, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1398, value_count=700545, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/00006-75-dc2c4f34-0586-4421-94d3-1124ed1b0086-0-00001.parquet,PARQUET,0,817764,18565050,"{1: 1590, 2: 2456, 3: 3168, 4: 1590, 5: 296457, 6: 1347081, 7: 1352128, 8: 868126, 9: 884105, 10: 457640, 11: 1145678, 12: 791492, 13: 517406, 14: 1146133, 15: 947332, 16: 984083, 17: 986078, 18: 1130497, 19: 1147139, 20: 458657, 21: 1130641, 22: 1147129, 23: 871345, 24: 8005, 25: 22695, 26: 25592, 27: 195147, 28: 96608, 29: 213487, 30: 225700, 31: 120542}","{1: 817764, 2: 817764, 3: 817764, 4: 817764, 5: 817764, 6: 817764, 7: 817764, 8: 817764, 9: 817764, 10: 817764, 11: 817764, 12: 817764, 13: 817764, 14: 817764, 15: 817764, 16: 817764, 17: 817764, 18: 817764, 19: 817764, 20: 817764, 21: 817764, 22: 817764, 23: 817764, 24: 817764, 25: 817764, 26: 817764, 27: 817764, 28: 817764, 29: 817764, 30: 817764, 31: 817764}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1929, 8: 0, 9: 0, 10: 0, 11: 12617, 12: 12617, 13: 13021, 14: 13021, 15: 3, 16: 15209, 17: 15209, 18: 0, 19: 13509, 20: 13509, 21: 0, 22: 13509, 23: 15209, 24: 0, 25: 0, 26: 804599, 27: 670826, 28: 670826, 29: 670826, 30: 670826, 31: 670826}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xae\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0e\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb0\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'B&\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\x86\x06\x00\x00'), 13: bytearray(b'\xb4\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xc6\x02\x00\x00'), 17: bytearray(b'\xaa\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xa9\x00\x00\x00'), 21: bytearray(b'`\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\x81\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'M\x04\x00\x00'), 28: bytearray(b'\xf1\x00\x00\x00'), 29: bytearray(b'\x81\x06\x00\x00'), 30: bytearray(b'3\x05\x00\x00'), 31: bytearray(b'\xbb\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=195147, value_count=817764, null_value_count=670826, nan_value_count=None, lower_bound=0, upper_bound=1101), air_time=Row(column_size=986078, value_count=817764, null_value_count=15209, nan_value_count=None, lower_bound=7, upper_bound=682), airline=Row(column_size=296457, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=213487, value_count=817764, null_value_count=670826, nan_value_count=None, lower_bound=0, upper_bound=1665), arrival_delay=Row(column_size=871345, value_count=817764, null_value_count=15209, nan_value_count=None, lower_bound=-80, upper_bound=1665), arrival_time=Row(column_size=1147129, value_count=817764, null_value_count=13509, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=25592, value_count=817764, null_value_count=804599, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=22695, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=3168, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1590, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=7, upper_bound=7), departure_delay=Row(column_size=791492, value_count=817764, null_value_count=12617, nan_value_count=None, lower_bound=-82, upper_bound=1670), departure_time=Row(column_size=1145678, value_count=817764, null_value_count=12617, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=884105, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), distance=Row(column_size=1130497, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=8005, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=984083, value_count=817764, null_value_count=15209, nan_value_count=None, lower_bound=14, upper_bound=710), flight_number=Row(column_size=1347081, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=9794), late_aircraft_delay=Row(column_size=225700, value_count=817764, null_value_count=670826, nan_value_count=None, lower_bound=0, upper_bound=1331), month=Row(column_size=2456, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=12), origin_airport=Row(column_size=868126, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='YUM'), scheduled_arrival=Row(column_size=1130641, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2400), scheduled_departure=Row(column_size=457640, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=947332, value_count=817764, null_value_count=3, nan_value_count=None, lower_bound=18, upper_bound=718), security_delay=Row(column_size=96608, value_count=817764, null_value_count=670826, nan_value_count=None, lower_bound=0, upper_bound=241), tail_number=Row(column_size=1352128, value_count=817764, null_value_count=1929, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=458657, value_count=817764, null_value_count=13509, nan_value_count=None, lower_bound=1, upper_bound=169), taxi_out=Row(column_size=517406, value_count=817764, null_value_count=13021, nan_value_count=None, lower_bound=1, upper_bound=180), weather_delay=Row(column_size=120542, value_count=817764, null_value_count=670826, nan_value_count=None, lower_bound=0, upper_bound=1211), wheels_off=Row(column_size=1146133, value_count=817764, null_value_count=13021, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=1147139, value_count=817764, null_value_count=13509, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1590, value_count=817764, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"


In [27]:
%%sql
SELECT
    SUM(file_size_in_bytes)
FROM airline.db.flights.files

sum(file_size_in_bytes)
132410345


In [26]:
%%sql
SELECT
    file_size_in_bytes
FROM airline.db.flights.files

file_size_in_bytes
19788655
19204359
19411983
19891606
19622735
15925957
18565050
