In [1]:
## Optional
# Format output of Jupyter Notebook
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

# Hide python warnings
import warnings
warnings.filterwarnings('ignore')

In [130]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType
from pyspark.sql.functions import lit, isnull, when, count, col, regexp_extract, concat_ws, to_date, expr, quarter, when, date_add, year, month, day, dayofweek

# TODO - parameterize endpoints/ports such as minio:9000 with envs

# Define config for SparkSession,
# such as Iceberg catalog that utilizes minio, an S3-compatible local object storage
spark_configs = {
    'spark.master': 'spark://spark-iceberg:7077',
    'spark.sql.catalog.airline': 'org.apache.iceberg.spark.SparkCatalog',
    # 'spark.sql.catalog.airline': 'org.apache.iceberg.spark.SparkSessionCatalog',
    'spark.sql.catalog.airline.io-impl': 'org.apache.iceberg.aws.s3.S3FileIO',
    'spark.sql.catalog.airline.s3.endpoint': 'http://minio:9000',
    'spark.sql.catalog.airline.type': 'rest',
    'spark.sql.catalog.airline.uri': 'http://rest:8181',
    'spark.sql.catalog.airline.warehouse': 's3://warehouse',
    'spark.sql.defaultCatalog': 'airline'
}

# Initialize SparkSession
spark = (
    SparkSession
    .builder
    .appName('EDA')
    .config(map=spark_configs)
    .getOrCreate()
)

In [7]:
import pprint as pp

# print(spark.sparkContext.uiWebUrl)

print(spark.conf.get('spark.app.id'))
print(spark.conf.get('spark.app.name'))
print(spark.conf.get('spark.app.startTime'))
print(spark.conf.get('spark.master'))
print()
print(spark.conf.get('spark.sql.catalog.airline'))
print(spark.conf.get('spark.sql.catalog.airline.io-impl'))
print(spark.conf.get('spark.sql.catalog.airline.s3.endpoint'))
print(spark.conf.get('spark.sql.catalog.airline.type'))
print(spark.conf.get('spark.sql.catalog.airline.uri'))
print(spark.conf.get('spark.sql.catalog.airline.warehouse'))
print()

                     
# pp.pp(dict(sc.getConf().getAll()), sort_dicts=True, )

local-1734722159949
EDA
1734722158776
spark://spark-iceberg:7077

org.apache.iceberg.spark.SparkCatalog
org.apache.iceberg.aws.s3.S3FileIO
http://minio:9000
rest
http://rest:8181
s3://warehouse



# Ingest raw csv data

In [37]:
# Check for alphabetic characters in flight_number, this is useless...
raw_data = spark.read.option('header', True).csv(filename)
raw_data.where(regexp_extract('flight_number', '^\d+$', 0) == '').show()

[Stage 34:>                                                         (0 + 7) / 7]

+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|YEAR|MONTH|DAY|DAY_OF_WEEK|AIRLINE|FLIGHT_NUMBER|TAIL_NUMBER|ORIGIN_AIRPORT|DESTINATION_AIRPORT|SCHEDULED_DEPARTURE|DEPARTURE_TIME|DEPARTURE_DELAY|TAXI_OUT|WHEELS_OFF|SCHEDULED_TIME|ELAPSED_TIME|AIR_TIME|DISTANCE|WHEELS_ON|TAXI_IN|SCHEDULED_ARRIVAL|ARRIVAL_TIME|ARRIVAL_DELAY|DIVERTED|CANCELLED|CANCELLATION_REASON|AIR_SYSTEM_DELAY|SECURITY_DELAY|AIRLINE_DELAY|LATE_AIRCRAFT_DELAY|WEATHER_DELAY|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

                                                                                

In [5]:
# Ingest flights data

# TODO: Fix nullables column, cant set column as Not-Nullable for some reason...

# Define schema
flights_schema = StructType([
    StructField("year", IntegerType(), False),   # Non-Nullable
    StructField("month", IntegerType(), False),  # Non-Nullable
    StructField("day", IntegerType(), False),    # Non-Nullable
    StructField("day_of_week", IntegerType(), False),    # Non-Nullable
    StructField("airline", StringType(), False),         # Non-Nullable
    StructField("flight_number", IntegerType(), False),  # Non-Nullable
    StructField("tail_number", StringType(), True),
    StructField("origin_airport", StringType(), False),  # Non-Nullable
    StructField("destination_airport", StringType(), False),   # Non-Nullable
    StructField("scheduled_departure", IntegerType(), False),  # Non-Nullable
    StructField("departure_time", IntegerType(), True),
    StructField("departure_delay", IntegerType(), True),
    StructField("taxi_out", IntegerType(), True),
    StructField("wheels_off", IntegerType(), True),
    StructField("scheduled_time", IntegerType(), True),
    StructField("elapsed_time", IntegerType(), True),
    StructField("air_time", IntegerType(), True),
    StructField("distance", IntegerType(), False),  # Non-Nullable
    StructField("wheels_on", IntegerType(), True),
    StructField("taxi_in", IntegerType(), True),
    StructField("scheduled_arrival", IntegerType(), False),  # Non-Nullable
    StructField("arrival_time", IntegerType(), True),
    StructField("arrival_delay", IntegerType(), True),
    StructField("diverted", IntegerType(), False),   # Non-Nullable
    StructField("cancelled", IntegerType(), False),  # Non-Nullable
    StructField("cancellation_reason", StringType(), True),
    StructField("air_system_delay", IntegerType(), True),
    StructField("security_delay", IntegerType(), True),
    StructField("airline_delay", IntegerType(), True),
    StructField("late_aircraft_delay", IntegerType(), True),
    StructField("weather_delay", IntegerType(), True)
])


# Read flights.csv
filename = '/home/iceberg/data/flights.csv'
flights_df = spark.read.schema(flights_schema).csv(
    '/home/iceberg/data/flights.csv',
    schema=flights_schema,
    enforceSchema=True,
    header = True
)
flights_df = flights_df.sort(['year', 'month', 'day', 'scheduled_departure'])
flights_df.cache()

flights_df.show()
# flights_df.printSchema()
flights_df.explain()

24/12/21 05:10:27 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|year|month|day|day_of_week|airline|flight_number|tail_number|origin_airport|destination_airport|scheduled_departure|departure_time|departure_delay|taxi_out|wheels_off|scheduled_time|elapsed_time|air_time|distance|wheels_on|taxi_in|scheduled_arrival|arrival_time|arrival_delay|diverted|cancelled|cancellation_reason|air_system_delay|security_delay|airline_delay|late_aircraft_delay|weather_delay|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

In [None]:
# Experimenting

# flights_df.groupBy(['flight_number', 'tail_number']).count().show()
# flights_df.filter(col('flight_number') == 2519).sort(['airline', 'month', 'day']).show(50)

# # Spark SQL
# date_df = flights_df.select(['year', 'month', 'day', 'day_of_week']).withColumn('date', expr('to_date(concat(year, "-", month))'))

In [135]:
## Create dim_date table
# Create date_df, we extract the date info from flights_df and dedup,
# Then add columns for date and quarter
date_df = flights_df \
    .select(['year', 'month', 'day', 'day_of_week']) \
    .dropDuplicates(['year', 'month', 'day', 'day_of_week']) \
    .withColumn('date', to_date(concat_ws('-', 'year', 'month', 'day'))) \
    .withColumn('quarter', quarter('date'))
date_df.cache()
# date_df.show()

# List of U.S. federal holidays
us_holidays_2015 = [
    ("2015-01-01", "New Year's Day"),
    ("2015-01-19", "Martin Luther King Jr. Day"),
    ("2015-02-16", "Presidents' Day"),
    ("2015-05-25", "Memorial Day"),
    ("2015-07-04", "Independence Day"),
    ("2015-09-07", "Labor Day"),
    ("2015-10-12", "Columbus Day"),
    ("2015-11-11", "Veterans Day"),
    ("2015-11-26", "Thanksgiving Day"),
    ("2015-12-25", "Christmas Day"),
]

# Create holidays_df and cast date from STRING to DATE type
holidays_df = spark.createDataFrame(us_holidays_2015, ['holiday_date', 'holiday_name'])
holidays_df = holidays_df.withColumn('holiday_date', to_date('holiday_date'))

# Join holidays to date_df and add is_holiday column
date_df = date_df \
    .join(
        holidays_df,
        date_df.date == holidays_df.holiday_date,
        'left'
    ) \
    .withColumn(
        'is_holiday',
        when(col('holiday_name').isNotNull(), lit(True)).otherwise(lit(False))
    ) \
    .drop('holiday_date') \
    .sort('date')

# Rearrange date to be first column,
date_df = date_df.select('date', *[col(c) for c in date_df.columns if c != 'date'])

date_df.show()

24/12/21 06:59:36 WARN CacheManager: Asked to cache already cached data.


+----------+----+-----+---+-----------+-------+--------------------+----------+
|      date|year|month|day|day_of_week|quarter|        holiday_name|is_holiday|
+----------+----+-----+---+-----------+-------+--------------------+----------+
|2015-01-01|2015|    1|  1|          4|      1|      New Year's Day|      true|
|2015-01-02|2015|    1|  2|          5|      1|                NULL|     false|
|2015-01-03|2015|    1|  3|          6|      1|                NULL|     false|
|2015-01-04|2015|    1|  4|          7|      1|                NULL|     false|
|2015-01-05|2015|    1|  5|          1|      1|                NULL|     false|
|2015-01-06|2015|    1|  6|          2|      1|                NULL|     false|
|2015-01-07|2015|    1|  7|          3|      1|                NULL|     false|
|2015-01-08|2015|    1|  8|          4|      1|                NULL|     false|
|2015-01-09|2015|    1|  9|          5|      1|                NULL|     false|
|2015-01-10|2015|    1| 10|          6| 

In [139]:
# Original day_of_week 1 is Monday, spark dayofweek 1 is Sunday
date_df = spark.range(365) \
    .withColumn('date', expr('date_add("2015-01-01", CAST(id AS INT))')) \
    .withColumn('year', year('date')) \
    .withColumn('month', month('date')) \
    .withColumn('day', day('date')) \
    .withColumn('day_of_week', dayofweek('date')) \
    .withColumn('quarter', quarter('date')) \
    .drop('id')

date_df.show()

+----------+----+-----+---+-----------+-------+
|      date|year|month|day|day_of_week|quarter|
+----------+----+-----+---+-----------+-------+
|2015-01-01|2015|    1|  1|          5|      1|
|2015-01-02|2015|    1|  2|          6|      1|
|2015-01-03|2015|    1|  3|          7|      1|
|2015-01-04|2015|    1|  4|          1|      1|
|2015-01-05|2015|    1|  5|          2|      1|
|2015-01-06|2015|    1|  6|          3|      1|
|2015-01-07|2015|    1|  7|          4|      1|
|2015-01-08|2015|    1|  8|          5|      1|
|2015-01-09|2015|    1|  9|          6|      1|
|2015-01-10|2015|    1| 10|          7|      1|
|2015-01-11|2015|    1| 11|          1|      1|
|2015-01-12|2015|    1| 12|          2|      1|
|2015-01-13|2015|    1| 13|          3|      1|
|2015-01-14|2015|    1| 14|          4|      1|
|2015-01-15|2015|    1| 15|          5|      1|
|2015-01-16|2015|    1| 16|          6|      1|
|2015-01-17|2015|    1| 17|          7|      1|
|2015-01-18|2015|    1| 18|          1| 

# Explore data!
TODO:
- Check statistics of important columns, like what are the percentiles for departure/arrival delay?
- Check max values for delay

## Total number of flight records: 5,819,079

In [32]:
flights_df.count()

                                                                                

5819079

## Total number of cancelled flights: 89,884, Total number of diverted flights: 15,187
These will cause nulls in other columns.

In [37]:
flights_df.where('cancelled = 1').count()

                                                                                

89884

In [35]:
flights_df.where('diverted = 1').count()

                                                                                

15187

## Get Null counts for each column

In [31]:
flights_df.select([count(when(isnull(c), c)).alias(c) for c in data.columns]).show()



+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|YEAR|MONTH|DAY|DAY_OF_WEEK|AIRLINE|FLIGHT_NUMBER|TAIL_NUMBER|ORIGIN_AIRPORT|DESTINATION_AIRPORT|SCHEDULED_DEPARTURE|DEPARTURE_TIME|DEPARTURE_DELAY|TAXI_OUT|WHEELS_OFF|SCHEDULED_TIME|ELAPSED_TIME|AIR_TIME|DISTANCE|WHEELS_ON|TAXI_IN|SCHEDULED_ARRIVAL|ARRIVAL_TIME|ARRIVAL_DELAY|DIVERTED|CANCELLED|CANCELLATION_REASON|AIR_SYSTEM_DELAY|SECURITY_DELAY|AIRLINE_DELAY|LATE_AIRCRAFT_DELAY|WEATHER_DELAY|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

                                                                                

## Investigate some causes for nulls in each column

In [48]:
# List of columns with null values
null_columns = [col for col in flights_df.columns if flights_df.filter(f"{col} IS NULL").count() > 0]

# Iterate through columns to investigate null causes
for col in null_columns:
    print(f"Investigating nulls in column: {col}")
    # Count rows where column is null and cancelled == 1
    count = flights_df.where(f"`{col}` IS NULL AND cancelled = 1").count()
    print(f"Nulls in {col} where flight is cancelled: {count}\n")

                                                                                

Investigating nulls in column: tail_number


                                                                                

Nulls in tail_number where flight is cancelled: 14721

Investigating nulls in column: departure_time


                                                                                

Nulls in departure_time where flight is cancelled: 86153

Investigating nulls in column: departure_delay


                                                                                

Nulls in departure_delay where flight is cancelled: 86153

Investigating nulls in column: taxi_out


                                                                                

Nulls in taxi_out where flight is cancelled: 89047

Investigating nulls in column: wheels_off


                                                                                

Nulls in wheels_off where flight is cancelled: 89047

Investigating nulls in column: scheduled_time


                                                                                

Nulls in scheduled_time where flight is cancelled: 5

Investigating nulls in column: elapsed_time


                                                                                

Nulls in elapsed_time where flight is cancelled: 89884

Investigating nulls in column: air_time


                                                                                

Nulls in air_time where flight is cancelled: 89884

Investigating nulls in column: wheels_on


                                                                                

Nulls in wheels_on where flight is cancelled: 89884

Investigating nulls in column: taxi_in


                                                                                

Nulls in taxi_in where flight is cancelled: 89884

Investigating nulls in column: arrival_time


                                                                                

Nulls in arrival_time where flight is cancelled: 89884

Investigating nulls in column: arrival_delay


                                                                                

Nulls in arrival_delay where flight is cancelled: 89884

Investigating nulls in column: cancellation_reason


                                                                                

Nulls in cancellation_reason where flight is cancelled: 0

Investigating nulls in column: air_system_delay


                                                                                

Nulls in air_system_delay where flight is cancelled: 89884

Investigating nulls in column: security_delay


                                                                                

Nulls in security_delay where flight is cancelled: 89884

Investigating nulls in column: airline_delay


                                                                                

Nulls in airline_delay where flight is cancelled: 89884

Investigating nulls in column: late_aircraft_delay


                                                                                

Nulls in late_aircraft_delay where flight is cancelled: 89884

Investigating nulls in column: weather_delay


[Stage 290:>                                                      (0 + 12) / 12]

Nulls in weather_delay where flight is cancelled: 89884



                                                                                

#### Tail_number - all Nulls are from cancelled flights
Although, not all cancelled flights have Nulls in tail_number. Possibly some flights are cancelled before an aircraft is assigned to the flight.

In [40]:
flights_df.where('tail_number IS NULL AND cancelled = 1').count()

                                                                                

14721

In [44]:
flights_df.where('departure_time IS NULL and cancelled = 1').count()

                                                                                

86153

#### Scheduled_time - 6 Nulls, 5 out of 6 flight were cancelled, 1 flight was diverted
These flights took off, but did not arrive in destination.
Scheduled_time is the estimated time in air?

In [28]:
flights_df.where('scheduled_time IS NULL').show()

[Stage 19:>                                                         (0 + 7) / 7]

+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+--------+---------+-------+-----------------+------------+-------------+--------+---------+-------------------+----------------+--------------+-------------+-------------------+-------------+
|year|month|day|day_of_week|airline|flight_number|tail_number|origin_airport|destination_airport|scheduled_departure|departure_time|departure_delay|taxi_out|wheels_off|scheduled_time|elapsed_time|air_time|distance|wheels_on|taxi_in|scheduled_arrival|arrival_time|arrival_delay|diverted|cancelled|cancellation_reason|air_system_delay|security_delay|airline_delay|late_aircraft_delay|weather_delay|
+----+-----+---+-----------+-------+-------------+-----------+--------------+-------------------+-------------------+--------------+---------------+--------+----------+--------------+------------+--------+-

                                                                                

# Write to staging Iceberg Table

In [13]:
# Drop flights table
spark.sql('DROP TABLE airline.db.flights PURGE')

DataFrame[]

In [14]:
# Create flights table if not exists
spark.sql("""
CREATE TABLE IF NOT EXISTS airline.db.flights (
    year INT,
    month INT,
    day INT,
    day_of_week INT,
    airline STRING,
    flight_number INT,
    tail_number STRING,
    origin_airport STRING,
    destination_airport STRING,
    scheduled_departure INT,
    departure_time INT,
    departure_delay INT,
    taxi_out INT,
    wheels_off INT,
    scheduled_time INT,
    elapsed_time INT,
    air_time INT,
    distance INT,
    wheels_on INT,
    taxi_in INT,
    scheduled_arrival INT,
    arrival_time INT,
    arrival_delay INT,
    diverted INT,
    cancelled INT,
    cancellation_reason STRING,
    air_system_delay INT,
    security_delay INT,
    airline_delay INT,
    late_aircraft_delay INT,
    weather_delay INT
)
USING iceberg
PARTITIONED BY (month)
""")

DataFrame[]

In [15]:
# Write flights data
flights_df \
    .writeTo('airline.db.flights') \
    .append()

                                                                                

In [24]:
# # Create database if not exists
# spark.sql('CREATE DATABASE IF NOT EXISTS db')

# print(spark.catalog.tableExists('airline.db.flights'))

# # Create flights table if not exists
# if not spark.catalog.tableExists('airline.db.flights'):
#     spark.catalog.createTable(
#         'airline.db.flights',
#         schema=flights_schema,
#         source='iceberg',
#         partitioning='month'
#     )

# spark.catalog.getTable('airline.db.flights')

False


Table(name='flights', catalog='airline', namespace=['db'], description=None, tableType='MANAGED', isTemporary=False)

In [9]:
spark.sql('DESCRIBE EXTENDED airline.db.flights').show()

+-------------------+---------+-------+
|           col_name|data_type|comment|
+-------------------+---------+-------+
|               year|      int|   NULL|
|              month|      int|   NULL|
|                day|      int|   NULL|
|        day_of_week|      int|   NULL|
|            airline|   string|   NULL|
|      flight_number|      int|   NULL|
|        tail_number|   string|   NULL|
|     origin_airport|   string|   NULL|
|destination_airport|   string|   NULL|
|scheduled_departure|      int|   NULL|
|     departure_time|      int|   NULL|
|    departure_delay|      int|   NULL|
|           taxi_out|      int|   NULL|
|         wheels_off|      int|   NULL|
|     scheduled_time|      int|   NULL|
|       elapsed_time|      int|   NULL|
|           air_time|      int|   NULL|
|           distance|      int|   NULL|
|          wheels_on|      int|   NULL|
|            taxi_in|      int|   NULL|
+-------------------+---------+-------+
only showing top 20 rows



#### HIDDEN PARTITIONING!?
When partitioning on year, still getting 12 files in 1 partition.

Dataset came sorted by date, so hidden partitioning was automatically splitting into 12 parquet files?

When partitioning on month, what is the behavior?

In [45]:
metrics = spark.sql('select readable_metrics from airline.db.flights.files').collect()

# Check lower and upper bounds on month to ensure partitioning by month is being performed correctly
for metric in metrics:
    temp = metric.asDict()['readable_metrics'].asDict()['month']
    print(temp)

Row(column_size=1136, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=12, upper_bound=12)
Row(column_size=1136, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=1)
Row(column_size=1225, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=6, upper_bound=6)
Row(column_size=1224, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=3)
Row(column_size=1182, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=5)
Row(column_size=1133, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=9, upper_bound=9)
Row(column_size=1179, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=4, upper_bound=4)
Row(column_size=1228, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=8, upper_bound=8)
Row(column_size=1271, value_count=520718, null_value_count=0, nan_value_count=None, lo

#### Checking table metadata

In [4]:
%%sql
SELECT
    *
FROM airline.db.flights.partitions

24/12/20 04:33:52 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


partition,spec_id,record_count,file_count,total_data_file_size_in_bytes,position_delete_record_count,position_delete_file_count,equality_delete_record_count,equality_delete_file_count,last_updated_at,last_updated_snapshot_id
Row(month=8),0,510536,1,11439099,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=9),0,464946,1,10218131,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=6),0,503897,1,11434535,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=7),0,520718,1,11755348,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=12),0,479230,1,10767738,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=10),0,486165,1,10651549,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=11),0,467972,1,10373628,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=1),0,469968,1,10532934,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=4),0,485151,1,10816223,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824
Row(month=5),0,496993,1,11088572,0,0,0,0,2024-12-19 20:34:47.122000,1297397742035865824


In [32]:
%%sql
SELECT
    *
FROM airline.db.flights.files

content,file_path,file_format,spec_id,partition,record_count,file_size_in_bytes,column_sizes,value_counts,null_value_counts,nan_value_counts,lower_bounds,upper_bounds,key_metadata,split_offsets,equality_ids,sort_order_id,readable_metrics
0,s3://warehouse/db/flights/data/month=12/00000-939-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=12),479230,10767738,"{1: 1136, 2: 1136, 3: 1299, 4: 1277, 5: 131603, 6: 796273, 7: 786943, 8: 499647, 9: 469992, 10: 158254, 11: 672629, 12: 459886, 13: 328777, 14: 672691, 15: 542137, 16: 578324, 17: 576617, 18: 663728, 19: 673279, 20: 314846, 21: 663918, 22: 673278, 23: 509403, 24: 5312, 25: 13550, 26: 15368, 27: 131307, 28: 58138, 29: 132839, 30: 134974, 31: 73400}","{1: 479230, 2: 479230, 3: 479230, 4: 479230, 5: 479230, 6: 479230, 7: 479230, 8: 479230, 9: 479230, 10: 479230, 11: 479230, 12: 479230, 13: 479230, 14: 479230, 15: 479230, 16: 479230, 17: 479230, 18: 479230, 19: 479230, 20: 479230, 21: 479230, 22: 479230, 23: 479230, 24: 479230, 25: 479230, 26: 479230, 27: 479230, 28: 479230, 29: 479230, 30: 479230, 31: 479230}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1161, 8: 0, 9: 0, 10: 0, 11: 7679, 12: 7679, 13: 7961, 14: 7961, 15: 0, 16: 9513, 17: 9513, 18: 0, 19: 8453, 20: 8453, 21: 0, 22: 8453, 23: 9513, 24: 0, 25: 0, 26: 471167, 27: 382458, 28: 382458, 29: 382458, 30: 382458, 31: 382458}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7820L'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xae\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x15\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb0\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x0c\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\xfd \x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'q\x06\x00\x00'), 13: bytearray(b'\xac\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xc1\x02\x00\x00'), 16: bytearray(b'\xda\x02\x00\x00'), 17: bytearray(b'\xb2\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xf8\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'd\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'\x19\x04\x00\x00'), 28: bytearray(b'\x9a\x00\x00\x00'), 29: bytearray(b'd\x06\x00\x00'), 30: bytearray(b'\xa6\x04\x00\x00'), 31: bytearray(b'\xbb\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=131307, value_count=479230, null_value_count=382458, nan_value_count=None, lower_bound=0, upper_bound=1049), air_time=Row(column_size=576617, value_count=479230, null_value_count=9513, nan_value_count=None, lower_bound=8, upper_bound=690), airline=Row(column_size=131603, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=132839, value_count=479230, null_value_count=382458, nan_value_count=None, lower_bound=0, upper_bound=1636), arrival_delay=Row(column_size=509403, value_count=479230, null_value_count=9513, nan_value_count=None, lower_bound=-80, upper_bound=1636), arrival_time=Row(column_size=673278, value_count=479230, null_value_count=8453, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=15368, value_count=479230, null_value_count=471167, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=13550, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1299, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1277, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=459886, value_count=479230, null_value_count=7679, nan_value_count=None, lower_bound=-82, upper_bound=1649), departure_time=Row(column_size=672629, value_count=479230, null_value_count=7679, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=469992, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=663728, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=5312, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=578324, value_count=479230, null_value_count=9513, nan_value_count=None, lower_bound=15, upper_bound=730), flight_number=Row(column_size=796273, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=8445), late_aircraft_delay=Row(column_size=134974, value_count=479230, null_value_count=382458, nan_value_count=None, lower_bound=0, upper_bound=1190), month=Row(column_size=1136, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=12, upper_bound=12), origin_airport=Row(column_size=499647, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=663918, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=158254, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=542137, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=21, upper_bound=705), security_delay=Row(column_size=58138, value_count=479230, null_value_count=382458, nan_value_count=None, lower_bound=0, upper_bound=154), tail_number=Row(column_size=786943, value_count=479230, null_value_count=1161, nan_value_count=None, lower_bound='7820L', upper_bound='N9EAMQ'), taxi_in=Row(column_size=314846, value_count=479230, null_value_count=8453, nan_value_count=None, lower_bound=1, upper_bound=248), taxi_out=Row(column_size=328777, value_count=479230, null_value_count=7961, nan_value_count=None, lower_bound=1, upper_bound=172), weather_delay=Row(column_size=73400, value_count=479230, null_value_count=382458, nan_value_count=None, lower_bound=0, upper_bound=1211), wheels_off=Row(column_size=672691, value_count=479230, null_value_count=7961, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=673279, value_count=479230, null_value_count=8453, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1136, value_count=479230, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=1/00001-940-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=1),469968,10532934,"{1: 1136, 2: 1136, 3: 1298, 4: 1280, 5: 134633, 6: 780102, 7: 776295, 8: 483031, 9: 475939, 10: 153371, 11: 660927, 12: 446381, 13: 310506, 14: 661083, 15: 529725, 16: 571562, 17: 567827, 18: 650936, 19: 661462, 20: 267295, 21: 651013, 22: 661462, 23: 500469, 24: 4290, 25: 17434, 26: 20142, 27: 129601, 28: 56268, 29: 129601, 30: 126924, 31: 73997}","{1: 469968, 2: 469968, 3: 469968, 4: 469968, 5: 469968, 6: 469968, 7: 469968, 8: 469968, 9: 469968, 10: 469968, 11: 469968, 12: 469968, 13: 469968, 14: 469968, 15: 469968, 16: 469968, 17: 469968, 18: 469968, 19: 469968, 20: 469968, 21: 469968, 22: 469968, 23: 469968, 24: 469968, 25: 469968, 26: 469968, 27: 469968, 28: 469968, 29: 469968, 30: 469968, 31: 469968}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 2782, 8: 0, 9: 0, 10: 0, 11: 11657, 12: 11657, 13: 11876, 14: 11876, 15: 0, 16: 12955, 17: 12955, 18: 0, 19: 12271, 20: 12271, 21: 0, 22: 12271, 23: 12955, 24: 0, 25: 0, 26: 457986, 27: 374017, 28: 374017, 29: 374017, 30: 374017, 31: 374017}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'N001AA'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x05\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd0\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x17\x00\x00\x00'), 16: bytearray(b'\x11\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xae\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x01\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'A&\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\xc4\x07\x00\x00'), 13: bytearray(b'\xb0\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xce\x02\x00\x00'), 16: bytearray(b'\xd4\x02\x00\x00'), 17: bytearray(b'\xa4\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xb3\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\xb3\x07\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'>\x03\x00\x00'), 28: bytearray(b'\xf1\x00\x00\x00'), 29: bytearray(b'\xb3\x07\x00\x00'), 30: bytearray(b'\xb4\x03\x00\x00'), 31: bytearray(b'\xaa\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=129601, value_count=469968, null_value_count=374017, nan_value_count=None, lower_bound=0, upper_bound=830), air_time=Row(column_size=567827, value_count=469968, null_value_count=12955, nan_value_count=None, lower_bound=8, upper_bound=676), airline=Row(column_size=134633, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=129601, value_count=469968, null_value_count=374017, nan_value_count=None, lower_bound=0, upper_bound=1971), arrival_delay=Row(column_size=500469, value_count=469968, null_value_count=12955, nan_value_count=None, lower_bound=-82, upper_bound=1971), arrival_time=Row(column_size=661462, value_count=469968, null_value_count=12271, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=20142, value_count=469968, null_value_count=457986, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=17434, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1298, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1280, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=446381, value_count=469968, null_value_count=11657, nan_value_count=None, lower_bound=-48, upper_bound=1988), departure_time=Row(column_size=660927, value_count=469968, null_value_count=11657, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=475939, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=650936, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=4290, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=571562, value_count=469968, null_value_count=12955, nan_value_count=None, lower_bound=17, upper_bound=724), flight_number=Row(column_size=780102, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=9793), late_aircraft_delay=Row(column_size=126924, value_count=469968, null_value_count=374017, nan_value_count=None, lower_bound=0, upper_bound=948), month=Row(column_size=1136, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=1), origin_airport=Row(column_size=483031, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=651013, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=153371, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=2359), scheduled_time=Row(column_size=529725, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=23, upper_bound=718), security_delay=Row(column_size=56268, value_count=469968, null_value_count=374017, nan_value_count=None, lower_bound=0, upper_bound=241), tail_number=Row(column_size=776295, value_count=469968, null_value_count=2782, nan_value_count=None, lower_bound='N001AA', upper_bound='N9EAMQ'), taxi_in=Row(column_size=267295, value_count=469968, null_value_count=12271, nan_value_count=None, lower_bound=1, upper_bound=179), taxi_out=Row(column_size=310506, value_count=469968, null_value_count=11876, nan_value_count=None, lower_bound=1, upper_bound=176), weather_delay=Row(column_size=73997, value_count=469968, null_value_count=374017, nan_value_count=None, lower_bound=0, upper_bound=938), wheels_off=Row(column_size=661083, value_count=469968, null_value_count=11876, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=661462, value_count=469968, null_value_count=12271, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1136, value_count=469968, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=6/00002-941-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=6),503897,11434535,"{1: 1225, 2: 1225, 3: 1385, 4: 1366, 5: 149356, 6: 834819, 7: 828293, 8: 517778, 9: 500099, 10: 160386, 11: 710956, 12: 519926, 13: 333443, 14: 711224, 15: 561888, 16: 614675, 17: 610076, 18: 697777, 19: 711423, 20: 329866, 21: 697849, 22: 711425, 23: 544611, 24: 6560, 25: 16760, 26: 18964, 27: 153239, 28: 63456, 29: 157053, 30: 156089, 31: 83970}","{1: 503897, 2: 503897, 3: 503897, 4: 503897, 5: 503897, 6: 503897, 7: 503897, 8: 503897, 9: 503897, 10: 503897, 11: 503897, 12: 503897, 13: 503897, 14: 503897, 15: 503897, 16: 503897, 17: 503897, 18: 503897, 19: 503897, 20: 503897, 21: 503897, 22: 503897, 23: 503897, 24: 503897, 25: 503897, 26: 503897, 27: 503897, 28: 503897, 29: 503897, 30: 503897, 31: 503897}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1475, 8: 0, 9: 0, 10: 0, 11: 8698, 12: 8698, 13: 9055, 14: 9055, 15: 0, 16: 11050, 17: 11050, 18: 0, 19: 9389, 20: 9389, 21: 0, 22: 9389, 23: 11050, 24: 0, 25: 0, 26: 494777, 27: 388155, 28: 388155, 29: 388155, 30: 388155, 31: 388155}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x06\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'N001AA'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x03\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xcc\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xbe\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x06\x00\x00\x00'), 3: bytearray(b'\x1e\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\xf2\x05\x00\x00'), 13: bytearray(b'\xa4\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\x99\x02\x00\x00'), 16: bytearray(b'\xab\x02\x00\x00'), 17: bytearray(b'\x8b\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xac\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\xe4\x05\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'\xdf\x03\x00\x00'), 28: bytearray(b'\xbf\x00\x00\x00'), 29: bytearray(b'\xe4\x05\x00\x00'), 30: bytearray(b'\xf2\x03\x00\x00'), 31: bytearray(b',\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=153239, value_count=503897, null_value_count=388155, nan_value_count=None, lower_bound=0, upper_bound=991), air_time=Row(column_size=610076, value_count=503897, null_value_count=11050, nan_value_count=None, lower_bound=7, upper_bound=651), airline=Row(column_size=149356, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=157053, value_count=503897, null_value_count=388155, nan_value_count=None, lower_bound=0, upper_bound=1508), arrival_delay=Row(column_size=544611, value_count=503897, null_value_count=11050, nan_value_count=None, lower_bound=-66, upper_bound=1508), arrival_time=Row(column_size=711425, value_count=503897, null_value_count=9389, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=18964, value_count=503897, null_value_count=494777, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=16760, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1385, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=30), day_of_week=Row(column_size=1366, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=519926, value_count=503897, null_value_count=8698, nan_value_count=None, lower_bound=-52, upper_bound=1522), departure_time=Row(column_size=710956, value_count=503897, null_value_count=8698, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=500099, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=697777, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=6560, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=614675, value_count=503897, null_value_count=11050, nan_value_count=None, lower_bound=15, upper_bound=683), flight_number=Row(column_size=834819, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=156089, value_count=503897, null_value_count=388155, nan_value_count=None, lower_bound=0, upper_bound=1010), month=Row(column_size=1225, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=6, upper_bound=6), origin_airport=Row(column_size=517778, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=697849, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=160386, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=2359), scheduled_time=Row(column_size=561888, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=18, upper_bound=665), security_delay=Row(column_size=63456, value_count=503897, null_value_count=388155, nan_value_count=None, lower_bound=0, upper_bound=191), tail_number=Row(column_size=828293, value_count=503897, null_value_count=1475, nan_value_count=None, lower_bound='N001AA', upper_bound='N9EAMQ'), taxi_in=Row(column_size=329866, value_count=503897, null_value_count=9389, nan_value_count=None, lower_bound=1, upper_bound=172), taxi_out=Row(column_size=333443, value_count=503897, null_value_count=9055, nan_value_count=None, lower_bound=1, upper_bound=164), weather_delay=Row(column_size=83970, value_count=503897, null_value_count=388155, nan_value_count=None, lower_bound=0, upper_bound=1068), wheels_off=Row(column_size=711224, value_count=503897, null_value_count=9055, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=711423, value_count=503897, null_value_count=9389, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1225, value_count=503897, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=3/00003-942-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=3),504312,11302473,"{1: 1225, 2: 1224, 3: 1385, 4: 1368, 5: 147546, 6: 835804, 7: 828220, 8: 525790, 9: 500117, 10: 161509, 11: 706366, 12: 497476, 13: 314730, 14: 706546, 15: 570469, 16: 608410, 17: 600359, 18: 698293, 19: 706262, 20: 340633, 21: 698432, 22: 706262, 23: 542286, 24: 4888, 25: 16379, 26: 18055, 27: 126940, 28: 60267, 29: 135736, 30: 139215, 31: 72937}","{1: 504312, 2: 504312, 3: 504312, 4: 504312, 5: 504312, 6: 504312, 7: 504312, 8: 504312, 9: 504312, 10: 504312, 11: 504312, 12: 504312, 13: 504312, 14: 504312, 15: 504312, 16: 504312, 17: 504312, 18: 504312, 19: 504312, 20: 504312, 21: 504312, 22: 504312, 23: 504312, 24: 504312, 25: 504312, 26: 504312, 27: 504312, 28: 504312, 29: 504312, 30: 504312, 31: 504312}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1814, 8: 0, 9: 0, 10: 0, 11: 10639, 12: 10639, 13: 10883, 14: 10883, 15: 0, 16: 12174, 17: 12174, 18: 0, 19: 11275, 20: 11275, 21: 0, 22: 11275, 23: 12174, 24: 0, 25: 0, 26: 493310, 27: 408860, 28: 408860, 29: 408860, 30: 408860, 31: 408860}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x03\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'D942DN'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd7\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x14\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xa9\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x03\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'B&\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'I\x06\x00\x00'), 13: bytearray(b'\xb4\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xa8\x02\x00\x00'), 16: bytearray(b'\xdd\x02\x00\x00'), 17: bytearray(b'\xb2\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xa9\x00\x00\x00'), 21: bytearray(b'`\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'>\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'M\x04\x00\x00'), 28: bytearray(b'\xdd\x00\x00\x00'), 29: bytearray(b'(\x06\x00\x00'), 30: bytearray(b'N\x04\x00\x00'), 31: bytearray(b'^\x04\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=126940, value_count=504312, null_value_count=408860, nan_value_count=None, lower_bound=0, upper_bound=1101), air_time=Row(column_size=600359, value_count=504312, null_value_count=12174, nan_value_count=None, lower_bound=7, upper_bound=690), airline=Row(column_size=147546, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=135736, value_count=504312, null_value_count=408860, nan_value_count=None, lower_bound=0, upper_bound=1576), arrival_delay=Row(column_size=542286, value_count=504312, null_value_count=12174, nan_value_count=None, lower_bound=-87, upper_bound=1598), arrival_time=Row(column_size=706262, value_count=504312, null_value_count=11275, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=18055, value_count=504312, null_value_count=493310, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=16379, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1385, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1368, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=497476, value_count=504312, null_value_count=10639, nan_value_count=None, lower_bound=-41, upper_bound=1609), departure_time=Row(column_size=706366, value_count=504312, null_value_count=10639, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=500117, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=698293, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=4888, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=608410, value_count=504312, null_value_count=12174, nan_value_count=None, lower_bound=15, upper_bound=733), flight_number=Row(column_size=835804, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=9794), late_aircraft_delay=Row(column_size=139215, value_count=504312, null_value_count=408860, nan_value_count=None, lower_bound=0, upper_bound=1102), month=Row(column_size=1224, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=3), origin_airport=Row(column_size=525790, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=698432, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2400), scheduled_departure=Row(column_size=161509, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=570469, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=20, upper_bound=680), security_delay=Row(column_size=60267, value_count=504312, null_value_count=408860, nan_value_count=None, lower_bound=0, upper_bound=221), tail_number=Row(column_size=828220, value_count=504312, null_value_count=1814, nan_value_count=None, lower_bound='D942DN', upper_bound='N9EAMQ'), taxi_in=Row(column_size=340633, value_count=504312, null_value_count=11275, nan_value_count=None, lower_bound=1, upper_bound=169), taxi_out=Row(column_size=314730, value_count=504312, null_value_count=10883, nan_value_count=None, lower_bound=1, upper_bound=180), weather_delay=Row(column_size=72937, value_count=504312, null_value_count=408860, nan_value_count=None, lower_bound=0, upper_bound=1118), wheels_off=Row(column_size=706546, value_count=504312, null_value_count=10883, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=706262, value_count=504312, null_value_count=11275, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1225, value_count=504312, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=5/00004-943-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=5),496993,11088572,"{1: 1182, 2: 1182, 3: 1346, 4: 1325, 5: 144686, 6: 823619, 7: 814737, 8: 506197, 9: 495986, 10: 158493, 11: 697835, 12: 475032, 13: 325019, 14: 698132, 15: 558271, 16: 598704, 17: 594212, 18: 688171, 19: 698583, 20: 315532, 21: 688301, 22: 698588, 23: 524833, 24: 5901, 25: 12050, 26: 13382, 27: 126404, 28: 58192, 29: 127187, 30: 134532, 31: 74389}","{1: 496993, 2: 496993, 3: 496993, 4: 496993, 5: 496993, 6: 496993, 7: 496993, 8: 496993, 9: 496993, 10: 496993, 11: 496993, 12: 496993, 13: 496993, 14: 496993, 15: 496993, 16: 496993, 17: 496993, 18: 496993, 19: 496993, 20: 496993, 21: 496993, 22: 496993, 23: 496993, 24: 496993, 25: 496993, 26: 496993, 27: 496993, 28: 496993, 29: 496993, 30: 496993, 31: 496993}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 580, 8: 0, 9: 0, 10: 0, 11: 5336, 12: 5336, 13: 5636, 14: 5636, 15: 2, 16: 7352, 17: 7352, 18: 0, 19: 5920, 20: 5920, 21: 0, 22: 5920, 23: 7352, 24: 0, 25: 0, 26: 491299, 27: 407348, 28: 407348, 29: 407348, 30: 407348, 31: 407348}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x05\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'N001AA'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x05\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd3\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x14\x00\x00\x00'), 16: bytearray(b'\x11\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb4\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x05\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\xd4\x05\x00\x00'), 13: bytearray(b'\xc8\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\x99\x02\x00\x00'), 16: bytearray(b'\xbe\x02\x00\x00'), 17: bytearray(b'\x88\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xac\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\xc8\x05\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'R\x03\x00\x00'), 28: bytearray(b'\xaf\x00\x00\x00'), 29: bytearray(b'\xc8\x05\x00\x00'), 30: bytearray(b'3\x05\x00\x00'), 31: bytearray(b'\xfd\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=126404, value_count=496993, null_value_count=407348, nan_value_count=None, lower_bound=0, upper_bound=850), air_time=Row(column_size=594212, value_count=496993, null_value_count=7352, nan_value_count=None, lower_bound=8, upper_bound=648), airline=Row(column_size=144686, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=127187, value_count=496993, null_value_count=407348, nan_value_count=None, lower_bound=0, upper_bound=1480), arrival_delay=Row(column_size=524833, value_count=496993, null_value_count=7352, nan_value_count=None, lower_bound=-76, upper_bound=1480), arrival_time=Row(column_size=698588, value_count=496993, null_value_count=5920, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=13382, value_count=496993, null_value_count=491299, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=12050, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1346, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1325, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=475032, value_count=496993, null_value_count=5336, nan_value_count=None, lower_bound=-45, upper_bound=1492), departure_time=Row(column_size=697835, value_count=496993, null_value_count=5336, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=495986, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=688171, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=5901, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=598704, value_count=496993, null_value_count=7352, nan_value_count=None, lower_bound=17, upper_bound=702), flight_number=Row(column_size=823619, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=134532, value_count=496993, null_value_count=407348, nan_value_count=None, lower_bound=0, upper_bound=1331), month=Row(column_size=1182, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=5), origin_airport=Row(column_size=506197, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=688301, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=158493, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=2359), scheduled_time=Row(column_size=558271, value_count=496993, null_value_count=2, nan_value_count=None, lower_bound=20, upper_bound=665), security_delay=Row(column_size=58192, value_count=496993, null_value_count=407348, nan_value_count=None, lower_bound=0, upper_bound=175), tail_number=Row(column_size=814737, value_count=496993, null_value_count=580, nan_value_count=None, lower_bound='N001AA', upper_bound='N9EAMQ'), taxi_in=Row(column_size=315532, value_count=496993, null_value_count=5920, nan_value_count=None, lower_bound=1, upper_bound=172), taxi_out=Row(column_size=325019, value_count=496993, null_value_count=5636, nan_value_count=None, lower_bound=1, upper_bound=200), weather_delay=Row(column_size=74389, value_count=496993, null_value_count=407348, nan_value_count=None, lower_bound=0, upper_bound=1021), wheels_off=Row(column_size=698132, value_count=496993, null_value_count=5636, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=698583, value_count=496993, null_value_count=5920, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1182, value_count=496993, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=9/00005-944-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=9),464946,10218131,"{1: 1133, 2: 1133, 3: 1294, 4: 1272, 5: 132475, 6: 772161, 7: 761893, 8: 486711, 9: 466316, 10: 152759, 11: 649087, 12: 427604, 13: 288333, 14: 649405, 15: 524483, 16: 558138, 17: 552566, 18: 632789, 19: 649770, 20: 311614, 21: 644104, 22: 649763, 23: 473469, 24: 3644, 25: 6418, 26: 6916, 27: 89845, 28: 48963, 29: 95022, 30: 96417, 31: 56999}","{1: 464946, 2: 464946, 3: 464946, 4: 464946, 5: 464946, 6: 464946, 7: 464946, 8: 464946, 9: 464946, 10: 464946, 11: 464946, 12: 464946, 13: 464946, 14: 464946, 15: 464946, 16: 464946, 17: 464946, 18: 464946, 19: 464946, 20: 464946, 21: 464946, 22: 464946, 23: 464946, 24: 464946, 25: 464946, 26: 464946, 27: 464946, 28: 464946, 29: 464946, 30: 464946, 31: 464946}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 318, 8: 0, 9: 0, 10: 0, 11: 1928, 12: 1928, 13: 2050, 14: 2050, 15: 0, 16: 2793, 17: 2793, 18: 0, 19: 2122, 20: 2122, 21: 0, 22: 2122, 23: 2793, 24: 0, 25: 0, 26: 462871, 27: 404885, 28: 404885, 29: 404885, 30: 404885, 31: 404885}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\t\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x03\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xc8\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x14\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb7\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\t\x00\x00\x00'), 3: bytearray(b'\x1e\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\x86\x06\x00\x00'), 13: bytearray(b'\xa1\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\x88\x02\x00\x00'), 16: bytearray(b'\xbf\x02\x00\x00'), 17: bytearray(b'}\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\x9d\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\x81\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'\xa3\x02\x00\x00'), 28: bytearray(b'=\x02\x00\x00'), 29: bytearray(b'\x81\x06\x00\x00'), 30: bytearray(b'\xfc\x02\x00\x00'), 31: bytearray(b'\xee\x02\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=89845, value_count=464946, null_value_count=404885, nan_value_count=None, lower_bound=0, upper_bound=675), air_time=Row(column_size=552566, value_count=464946, null_value_count=2793, nan_value_count=None, lower_bound=7, upper_bound=637), airline=Row(column_size=132475, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=95022, value_count=464946, null_value_count=404885, nan_value_count=None, lower_bound=0, upper_bound=1665), arrival_delay=Row(column_size=473469, value_count=464946, null_value_count=2793, nan_value_count=None, lower_bound=-73, upper_bound=1665), arrival_time=Row(column_size=649763, value_count=464946, null_value_count=2122, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=6916, value_count=464946, null_value_count=462871, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=6418, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1294, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=30), day_of_week=Row(column_size=1272, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=427604, value_count=464946, null_value_count=1928, nan_value_count=None, lower_bound=-56, upper_bound=1670), departure_time=Row(column_size=649087, value_count=464946, null_value_count=1928, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=466316, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=632789, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=3644, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=558138, value_count=464946, null_value_count=2793, nan_value_count=None, lower_bound=15, upper_bound=703), flight_number=Row(column_size=772161, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=96417, value_count=464946, null_value_count=404885, nan_value_count=None, lower_bound=0, upper_bound=764), month=Row(column_size=1133, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=9, upper_bound=9), origin_airport=Row(column_size=486711, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=644104, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=152759, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=2359), scheduled_time=Row(column_size=524483, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=20, upper_bound=648), security_delay=Row(column_size=48963, value_count=464946, null_value_count=404885, nan_value_count=None, lower_bound=0, upper_bound=573), tail_number=Row(column_size=761893, value_count=464946, null_value_count=318, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=311614, value_count=464946, null_value_count=2122, nan_value_count=None, lower_bound=1, upper_bound=157), taxi_out=Row(column_size=288333, value_count=464946, null_value_count=2050, nan_value_count=None, lower_bound=1, upper_bound=161), weather_delay=Row(column_size=56999, value_count=464946, null_value_count=404885, nan_value_count=None, lower_bound=0, upper_bound=750), wheels_off=Row(column_size=649405, value_count=464946, null_value_count=2050, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=649770, value_count=464946, null_value_count=2122, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1133, value_count=464946, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=4/00006-945-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=4),485151,10816223,"{1: 1179, 2: 1179, 3: 1341, 4: 1320, 5: 139282, 6: 804792, 7: 795490, 8: 508440, 9: 486408, 10: 155374, 11: 680506, 12: 450093, 13: 319393, 14: 680748, 15: 541736, 16: 590404, 17: 582706, 18: 671887, 19: 681334, 20: 316792, 21: 671966, 22: 681333, 23: 507388, 24: 5300, 25: 10534, 26: 11667, 27: 121533, 28: 55978, 29: 123526, 30: 120765, 31: 69283}","{1: 485151, 2: 485151, 3: 485151, 4: 485151, 5: 485151, 6: 485151, 7: 485151, 8: 485151, 9: 485151, 10: 485151, 11: 485151, 12: 485151, 13: 485151, 14: 485151, 15: 485151, 16: 485151, 17: 485151, 18: 485151, 19: 485151, 20: 485151, 21: 485151, 22: 485151, 23: 485151, 24: 485151, 25: 485151, 26: 485151, 27: 485151, 28: 485151, 29: 485151, 30: 485151, 31: 485151}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 579, 8: 0, 9: 0, 10: 0, 11: 4253, 12: 4253, 13: 4455, 14: 4455, 15: 2, 16: 5900, 17: 5900, 18: 0, 19: 4708, 20: 4708, 21: 0, 22: 4708, 23: 5900, 24: 0, 25: 0, 26: 480631, 27: 402904, 28: 402904, 29: 402904, 30: 402904, 31: 402904}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x04\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'N001AA'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x05\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xbc\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x14\x00\x00\x00'), 16: bytearray(b'\x0f\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x15\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xa9\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x04\x00\x00\x00'), 3: bytearray(b'\x1e\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'\x06\x06\x00\x00'), 13: bytearray(b'\x9e\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xa7\x02\x00\x00'), 16: bytearray(b'\xdf\x02\x00\x00'), 17: bytearray(b'\x99\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\x9e\x00\x00\x00'), 21: bytearray(b'`\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\x12\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'x\x03\x00\x00'), 28: bytearray(b'\xcd\x00\x00\x00'), 29: bytearray(b'\x84\x04\x00\x00'), 30: bytearray(b'\x0e\x05\x00\x00'), 31: bytearray(b'\x1c\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=121533, value_count=485151, null_value_count=402904, nan_value_count=None, lower_bound=0, upper_bound=888), air_time=Row(column_size=582706, value_count=485151, null_value_count=5900, nan_value_count=None, lower_bound=8, upper_bound=665), airline=Row(column_size=139282, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=123526, value_count=485151, null_value_count=402904, nan_value_count=None, lower_bound=0, upper_bound=1156), arrival_delay=Row(column_size=507388, value_count=485151, null_value_count=5900, nan_value_count=None, lower_bound=-87, upper_bound=1554), arrival_time=Row(column_size=681333, value_count=485151, null_value_count=4708, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=11667, value_count=485151, null_value_count=480631, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=10534, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1341, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=30), day_of_week=Row(column_size=1320, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=450093, value_count=485151, null_value_count=4253, nan_value_count=None, lower_bound=-68, upper_bound=1542), departure_time=Row(column_size=680506, value_count=485151, null_value_count=4253, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=486408, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=671887, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=21, upper_bound=4983), diverted=Row(column_size=5300, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=590404, value_count=485151, null_value_count=5900, nan_value_count=None, lower_bound=15, upper_bound=735), flight_number=Row(column_size=804792, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=120765, value_count=485151, null_value_count=402904, nan_value_count=None, lower_bound=0, upper_bound=1294), month=Row(column_size=1179, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=4, upper_bound=4), origin_airport=Row(column_size=508440, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=671966, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2400), scheduled_departure=Row(column_size=155374, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=2359), scheduled_time=Row(column_size=541736, value_count=485151, null_value_count=2, nan_value_count=None, lower_bound=20, upper_bound=679), security_delay=Row(column_size=55978, value_count=485151, null_value_count=402904, nan_value_count=None, lower_bound=0, upper_bound=205), tail_number=Row(column_size=795490, value_count=485151, null_value_count=579, nan_value_count=None, lower_bound='N001AA', upper_bound='N9EAMQ'), taxi_in=Row(column_size=316792, value_count=485151, null_value_count=4708, nan_value_count=None, lower_bound=1, upper_bound=158), taxi_out=Row(column_size=319393, value_count=485151, null_value_count=4455, nan_value_count=None, lower_bound=1, upper_bound=158), weather_delay=Row(column_size=69283, value_count=485151, null_value_count=402904, nan_value_count=None, lower_bound=0, upper_bound=796), wheels_off=Row(column_size=680748, value_count=485151, null_value_count=4455, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=681334, value_count=485151, null_value_count=4708, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1179, value_count=485151, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=8/00007-946-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=8),510536,11439099,"{1: 1228, 2: 1228, 3: 1392, 4: 1370, 5: 145875, 6: 845610, 7: 840104, 8: 528633, 9: 501076, 10: 165804, 11: 717036, 12: 515325, 13: 338544, 14: 717441, 15: 577470, 16: 618275, 17: 616805, 18: 706923, 19: 717848, 20: 317878, 21: 706981, 22: 717844, 23: 548835, 24: 5713, 25: 11762, 26: 12960, 27: 129727, 28: 61063, 29: 133533, 30: 131834, 31: 75641}","{1: 510536, 2: 510536, 3: 510536, 4: 510536, 5: 510536, 6: 510536, 7: 510536, 8: 510536, 9: 510536, 10: 510536, 11: 510536, 12: 510536, 13: 510536, 14: 510536, 15: 510536, 16: 510536, 17: 510536, 18: 510536, 19: 510536, 20: 510536, 21: 510536, 22: 510536, 23: 510536, 24: 510536, 25: 510536, 26: 510536, 27: 510536, 28: 510536, 29: 510536, 30: 510536, 31: 510536}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 809, 8: 0, 9: 0, 10: 0, 11: 4719, 12: 4719, 13: 5004, 14: 5004, 15: 0, 16: 6580, 17: 6580, 18: 0, 19: 5218, 20: 5218, 21: 0, 22: 5218, 23: 6580, 24: 0, 25: 0, 26: 505484, 27: 416423, 28: 416423, 29: 416423, 30: 416423, 31: 416423}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x08\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x01\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd6\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x14\x00\x00\x00'), 16: bytearray(b'\x10\x00\x00\x00'), 17: bytearray(b'\x08\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb0\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x08\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x7f&\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'V\x07\x00\x00'), 13: bytearray(b'\xaa\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\x8c\x02\x00\x00'), 16: bytearray(b'\xc4\x02\x00\x00'), 17: bytearray(b'\x8b\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xab\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'j\x07\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'n\x04\x00\x00'), 28: bytearray(b'\xdd\x00\x00\x00'), 29: bytearray(b'V\x07\x00\x00'), 30: bytearray(b""\'\x03\x00\x00""), 31: bytearray(b'\x80\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=129727, value_count=510536, null_value_count=416423, nan_value_count=None, lower_bound=0, upper_bound=1134), air_time=Row(column_size=616805, value_count=510536, null_value_count=6580, nan_value_count=None, lower_bound=8, upper_bound=651), airline=Row(column_size=145875, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=133533, value_count=510536, null_value_count=416423, nan_value_count=None, lower_bound=0, upper_bound=1878), arrival_delay=Row(column_size=548835, value_count=510536, null_value_count=6580, nan_value_count=None, lower_bound=-80, upper_bound=1898), arrival_time=Row(column_size=717844, value_count=510536, null_value_count=5218, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=12960, value_count=510536, null_value_count=505484, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=11762, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1392, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1370, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=515325, value_count=510536, null_value_count=4719, nan_value_count=None, lower_bound=-42, upper_bound=1878), departure_time=Row(column_size=717036, value_count=510536, null_value_count=4719, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=501076, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=706923, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=5713, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=618275, value_count=510536, null_value_count=6580, nan_value_count=None, lower_bound=16, upper_bound=708), flight_number=Row(column_size=845610, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=9855), late_aircraft_delay=Row(column_size=131834, value_count=510536, null_value_count=416423, nan_value_count=None, lower_bound=0, upper_bound=807), month=Row(column_size=1228, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=8, upper_bound=8), origin_airport=Row(column_size=528633, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=706981, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=165804, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_time=Row(column_size=577470, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=20, upper_bound=652), security_delay=Row(column_size=61063, value_count=510536, null_value_count=416423, nan_value_count=None, lower_bound=0, upper_bound=221), tail_number=Row(column_size=840104, value_count=510536, null_value_count=809, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=317878, value_count=510536, null_value_count=5218, nan_value_count=None, lower_bound=1, upper_bound=171), taxi_out=Row(column_size=338544, value_count=510536, null_value_count=5004, nan_value_count=None, lower_bound=1, upper_bound=170), weather_delay=Row(column_size=75641, value_count=510536, null_value_count=416423, nan_value_count=None, lower_bound=0, upper_bound=896), wheels_off=Row(column_size=717441, value_count=510536, null_value_count=5004, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=717848, value_count=510536, null_value_count=5218, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1228, value_count=510536, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=7/00008-947-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=7),520718,11755348,"{1: 1271, 2: 1271, 3: 1436, 4: 1416, 5: 147021, 6: 862091, 7: 856490, 8: 536054, 9: 517051, 10: 166903, 11: 731819, 12: 518151, 13: 343134, 14: 732234, 15: 588960, 16: 634629, 17: 629355, 18: 720974, 19: 732625, 20: 379857, 21: 721046, 22: 732627, 23: 553566, 24: 5795, 25: 11952, 26: 13254, 27: 140558, 28: 63615, 29: 155884, 30: 147988, 31: 78205}","{1: 520718, 2: 520718, 3: 520718, 4: 520718, 5: 520718, 6: 520718, 7: 520718, 8: 520718, 9: 520718, 10: 520718, 11: 520718, 12: 520718, 13: 520718, 14: 520718, 15: 520718, 16: 520718, 17: 520718, 18: 520718, 19: 520718, 20: 520718, 21: 520718, 22: 520718, 23: 520718, 24: 520718, 25: 520718, 26: 520718, 27: 520718, 28: 520718, 29: 520718, 30: 520718, 31: 520718}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 723, 8: 0, 9: 0, 10: 0, 11: 4507, 12: 4507, 13: 4756, 14: 4756, 15: 0, 16: 6334, 17: 6334, 18: 0, 19: 4947, 20: 4947, 21: 0, 22: 4947, 23: 6334, 24: 0, 25: 0, 26: 515912, 27: 413091, 28: 413091, 29: 413091, 30: 413091, 31: 413091}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x07\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'7819A'), 8: bytearray(b'ABE'), 9: bytearray(b'ABE'), 10: bytearray(b'\x03\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd7\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x12\x00\x00\x00'), 16: bytearray(b'\x0e\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xb7\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\x07\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'YUM'), 9: bytearray(b'YUM'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'Y\x06\x00\x00'), 13: bytearray(b'\xa8\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\x8c\x02\x00\x00'), 16: bytearray(b'\xb8\x02\x00\x00'), 17: bytearray(b'\x7f\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\xa4\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'd\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'D'), 27: bytearray(b'\x10\x03\x00\x00'), 28: bytearray(b'\x00\x01\x00\x00'), 29: bytearray(b'Y\x06\x00\x00'), 30: bytearray(b'e\x03\x00\x00'), 31: bytearray(b'D\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=140558, value_count=520718, null_value_count=413091, nan_value_count=None, lower_bound=0, upper_bound=784), air_time=Row(column_size=629355, value_count=520718, null_value_count=6334, nan_value_count=None, lower_bound=7, upper_bound=639), airline=Row(column_size=147021, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=155884, value_count=520718, null_value_count=413091, nan_value_count=None, lower_bound=0, upper_bound=1625), arrival_delay=Row(column_size=553566, value_count=520718, null_value_count=6334, nan_value_count=None, lower_bound=-73, upper_bound=1636), arrival_time=Row(column_size=732627, value_count=520718, null_value_count=4947, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=13254, value_count=520718, null_value_count=515912, nan_value_count=None, lower_bound='A', upper_bound='D'), cancelled=Row(column_size=11952, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1436, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1416, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=518151, value_count=520718, null_value_count=4507, nan_value_count=None, lower_bound=-41, upper_bound=1625), departure_time=Row(column_size=731819, value_count=520718, null_value_count=4507, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=517051, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), distance=Row(column_size=720974, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=5795, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=634629, value_count=520718, null_value_count=6334, nan_value_count=None, lower_bound=14, upper_bound=696), flight_number=Row(column_size=862091, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=147988, value_count=520718, null_value_count=413091, nan_value_count=None, lower_bound=0, upper_bound=869), month=Row(column_size=1271, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=7, upper_bound=7), origin_airport=Row(column_size=536054, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound='ABE', upper_bound='YUM'), scheduled_arrival=Row(column_size=721046, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=166903, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=3, upper_bound=2359), scheduled_time=Row(column_size=588960, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=18, upper_bound=652), security_delay=Row(column_size=63615, value_count=520718, null_value_count=413091, nan_value_count=None, lower_bound=0, upper_bound=256), tail_number=Row(column_size=856490, value_count=520718, null_value_count=723, nan_value_count=None, lower_bound='7819A', upper_bound='N9EAMQ'), taxi_in=Row(column_size=379857, value_count=520718, null_value_count=4947, nan_value_count=None, lower_bound=1, upper_bound=164), taxi_out=Row(column_size=343134, value_count=520718, null_value_count=4756, nan_value_count=None, lower_bound=1, upper_bound=168), weather_delay=Row(column_size=78205, value_count=520718, null_value_count=413091, nan_value_count=None, lower_bound=0, upper_bound=836), wheels_off=Row(column_size=732234, value_count=520718, null_value_count=4756, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=732625, value_count=520718, null_value_count=4947, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1271, value_count=520718, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"
0,s3://warehouse/db/flights/data/month=10/00009-948-22846555-129f-4460-bd05-fe6cf2fc1357-0-00001.parquet,PARQUET,0,Row(month=10),486165,10651549,"{1: 1178, 2: 1178, 3: 1340, 4: 1321, 5: 131738, 6: 805649, 7: 794000, 8: 504873, 9: 483405, 10: 156733, 11: 678743, 12: 443788, 13: 313010, 14: 678961, 15: 549965, 16: 580107, 17: 575527, 18: 661533, 19: 679374, 20: 320549, 21: 673433, 22: 679368, 23: 491517, 24: 3875, 25: 6922, 26: 7550, 27: 89852, 28: 50736, 29: 105428, 30: 96483, 31: 56779}","{1: 486165, 2: 486165, 3: 486165, 4: 486165, 5: 486165, 6: 486165, 7: 486165, 8: 486165, 9: 486165, 10: 486165, 11: 486165, 12: 486165, 13: 486165, 14: 486165, 15: 486165, 16: 486165, 17: 486165, 18: 486165, 19: 486165, 20: 486165, 21: 486165, 22: 486165, 23: 486165, 24: 486165, 25: 486165, 26: 486165, 27: 486165, 28: 486165, 29: 486165, 30: 486165, 31: 486165}","{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 354, 8: 0, 9: 0, 10: 0, 11: 2339, 12: 2339, 13: 2435, 14: 2435, 15: 0, 16: 3287, 17: 3287, 18: 0, 19: 2571, 20: 2571, 21: 0, 22: 2571, 23: 3287, 24: 0, 25: 0, 26: 483711, 27: 426086, 28: 426086, 29: 426086, 30: 426086, 31: 426086}",{},"{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\n\x00\x00\x00'), 3: bytearray(b'\x01\x00\x00\x00'), 4: bytearray(b'\x01\x00\x00\x00'), 5: bytearray(b'AA'), 6: bytearray(b'\x01\x00\x00\x00'), 7: bytearray(b'N001AA'), 8: bytearray(b'10135'), 9: bytearray(b'10135'), 10: bytearray(b'\x05\x00\x00\x00'), 11: bytearray(b'\x01\x00\x00\x00'), 12: bytearray(b'\xd7\xff\xff\xff'), 13: bytearray(b'\x01\x00\x00\x00'), 14: bytearray(b'\x01\x00\x00\x00'), 15: bytearray(b'\x16\x00\x00\x00'), 16: bytearray(b'\x0e\x00\x00\x00'), 17: bytearray(b'\x07\x00\x00\x00'), 18: bytearray(b'\x1f\x00\x00\x00'), 19: bytearray(b'\x01\x00\x00\x00'), 20: bytearray(b'\x01\x00\x00\x00'), 21: bytearray(b'\x01\x00\x00\x00'), 22: bytearray(b'\x01\x00\x00\x00'), 23: bytearray(b'\xaf\xff\xff\xff'), 24: bytearray(b'\x00\x00\x00\x00'), 25: bytearray(b'\x00\x00\x00\x00'), 26: bytearray(b'A'), 27: bytearray(b'\x00\x00\x00\x00'), 28: bytearray(b'\x00\x00\x00\x00'), 29: bytearray(b'\x00\x00\x00\x00'), 30: bytearray(b'\x00\x00\x00\x00'), 31: bytearray(b'\x00\x00\x00\x00')}","{1: bytearray(b'\xdf\x07\x00\x00'), 2: bytearray(b'\n\x00\x00\x00'), 3: bytearray(b'\x1f\x00\x00\x00'), 4: bytearray(b'\x07\x00\x00\x00'), 5: bytearray(b'WN'), 6: bytearray(b'\x0e\x1d\x00\x00'), 7: bytearray(b'N9EAMQ'), 8: bytearray(b'16218'), 9: bytearray(b'16218'), 10: bytearray(b'7\t\x00\x00'), 11: bytearray(b'`\t\x00\x00'), 12: bytearray(b'%\x06\x00\x00'), 13: bytearray(b'\xb1\x00\x00\x00'), 14: bytearray(b'`\t\x00\x00'), 15: bytearray(b'\xa4\x02\x00\x00'), 16: bytearray(b'\xb4\x02\x00\x00'), 17: bytearray(b'\x95\x02\x00\x00'), 18: bytearray(b'w\x13\x00\x00'), 19: bytearray(b'`\t\x00\x00'), 20: bytearray(b'\x92\x00\x00\x00'), 21: bytearray(b'7\t\x00\x00'), 22: bytearray(b'`\t\x00\x00'), 23: bytearray(b'\x13\x06\x00\x00'), 24: bytearray(b'\x01\x00\x00\x00'), 25: bytearray(b'\x01\x00\x00\x00'), 26: bytearray(b'C'), 27: bytearray(b'A\x02\x00\x00'), 28: bytearray(b'Y\x00\x00\x00'), 29: bytearray(b'\x13\x06\x00\x00'), 30: bytearray(b'0\x03\x00\x00'), 31: bytearray(b'\xe3\x03\x00\x00')}",,[4],,0,"Row(air_system_delay=Row(column_size=89852, value_count=486165, null_value_count=426086, nan_value_count=None, lower_bound=0, upper_bound=577), air_time=Row(column_size=575527, value_count=486165, null_value_count=3287, nan_value_count=None, lower_bound=7, upper_bound=661), airline=Row(column_size=131738, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound='AA', upper_bound='WN'), airline_delay=Row(column_size=105428, value_count=486165, null_value_count=426086, nan_value_count=None, lower_bound=0, upper_bound=1555), arrival_delay=Row(column_size=491517, value_count=486165, null_value_count=3287, nan_value_count=None, lower_bound=-81, upper_bound=1555), arrival_time=Row(column_size=679368, value_count=486165, null_value_count=2571, nan_value_count=None, lower_bound=1, upper_bound=2400), cancellation_reason=Row(column_size=7550, value_count=486165, null_value_count=483711, nan_value_count=None, lower_bound='A', upper_bound='C'), cancelled=Row(column_size=6922, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), day=Row(column_size=1340, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=31), day_of_week=Row(column_size=1321, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7), departure_delay=Row(column_size=443788, value_count=486165, null_value_count=2339, nan_value_count=None, lower_bound=-41, upper_bound=1573), departure_time=Row(column_size=678743, value_count=486165, null_value_count=2339, nan_value_count=None, lower_bound=1, upper_bound=2400), destination_airport=Row(column_size=483405, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='16218'), distance=Row(column_size=661533, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=31, upper_bound=4983), diverted=Row(column_size=3875, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=0, upper_bound=1), elapsed_time=Row(column_size=580107, value_count=486165, null_value_count=3287, nan_value_count=None, lower_bound=14, upper_bound=692), flight_number=Row(column_size=805649, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=7438), late_aircraft_delay=Row(column_size=96483, value_count=486165, null_value_count=426086, nan_value_count=None, lower_bound=0, upper_bound=816), month=Row(column_size=1178, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=10, upper_bound=10), origin_airport=Row(column_size=504873, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound='10135', upper_bound='16218'), scheduled_arrival=Row(column_size=673433, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=1, upper_bound=2359), scheduled_departure=Row(column_size=156733, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=5, upper_bound=2359), scheduled_time=Row(column_size=549965, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=22, upper_bound=676), security_delay=Row(column_size=50736, value_count=486165, null_value_count=426086, nan_value_count=None, lower_bound=0, upper_bound=89), tail_number=Row(column_size=794000, value_count=486165, null_value_count=354, nan_value_count=None, lower_bound='N001AA', upper_bound='N9EAMQ'), taxi_in=Row(column_size=320549, value_count=486165, null_value_count=2571, nan_value_count=None, lower_bound=1, upper_bound=146), taxi_out=Row(column_size=313010, value_count=486165, null_value_count=2435, nan_value_count=None, lower_bound=1, upper_bound=177), weather_delay=Row(column_size=56779, value_count=486165, null_value_count=426086, nan_value_count=None, lower_bound=0, upper_bound=995), wheels_off=Row(column_size=678961, value_count=486165, null_value_count=2435, nan_value_count=None, lower_bound=1, upper_bound=2400), wheels_on=Row(column_size=679374, value_count=486165, null_value_count=2571, nan_value_count=None, lower_bound=1, upper_bound=2400), year=Row(column_size=1178, value_count=486165, null_value_count=0, nan_value_count=None, lower_bound=2015, upper_bound=2015))"


In [17]:
%%sql
SELECT
    SUM(file_size_in_bytes)
FROM airline.db.flights.files

sum(file_size_in_bytes)
130081273


In [19]:
%%sql
SELECT 
    partition
FROM 
    airline.db.flights.files
WHERE 
    null_value_counts['7'] > 0
GROUP BY 
    partition

partition
Row(month=12)
Row(month=1)
Row(month=6)
Row(month=3)
Row(month=5)
Row(month=9)
Row(month=4)
Row(month=8)
Row(month=7)
Row(month=10)
