In [0]:
%fs ls dbfs:/databricks-datasets/airlines/

path,name,size,modificationTime
dbfs:/databricks-datasets/airlines/README.md,README.md,1089,1454697889000
dbfs:/databricks-datasets/airlines/_SUCCESS,_SUCCESS,0,1436493184000
dbfs:/databricks-datasets/airlines/part-00000,part-00000,67108879,1436493184000
dbfs:/databricks-datasets/airlines/part-00001,part-00001,67108862,1436493185000
dbfs:/databricks-datasets/airlines/part-00002,part-00002,67108930,1436493185000
dbfs:/databricks-datasets/airlines/part-00003,part-00003,67108804,1436493186000
dbfs:/databricks-datasets/airlines/part-00004,part-00004,67108908,1436493186000
dbfs:/databricks-datasets/airlines/part-00005,part-00005,67108890,1436493187000
dbfs:/databricks-datasets/airlines/part-00006,part-00006,67108825,1436493187000
dbfs:/databricks-datasets/airlines/part-00007,part-00007,67108880,1436493187000


In [0]:
airlines_schema = spark.\
    read.\
    csv(
        'dbfs:/databricks-datasets/airlines/part-00000',
        header=True,
        inferSchema=True
    ).schema

airlines_schema

Out[9]: StructType([StructField('Year', IntegerType(), True), StructField('Month', IntegerType(), True), StructField('DayofMonth', IntegerType(), True), StructField('DayOfWeek', IntegerType(), True), StructField('DepTime', StringType(), True), StructField('CRSDepTime', IntegerType(), True), StructField('ArrTime', StringType(), True), StructField('CRSArrTime', IntegerType(), True), StructField('UniqueCarrier', StringType(), True), StructField('FlightNum', IntegerType(), True), StructField('TailNum', StringType(), True), StructField('ActualElapsedTime', StringType(), True), StructField('CRSElapsedTime', IntegerType(), True), StructField('AirTime', StringType(), True), StructField('ArrDelay', StringType(), True), StructField('DepDelay', StringType(), True), StructField('Origin', StringType(), True), StructField('Dest', StringType(), True), StructField('Distance', StringType(), True), StructField('TaxiIn', StringType(), True), StructField('TaxiOut', StringType(), True), StructField('Cancel

In [0]:
airlines_df = spark\
    .read\
    .csv(
        'dbfs:/databricks-datasets/airlines/part-*',
        header=True,
        schema=airlines_schema
    )

In [0]:
airlines_df.count()

Out[11]: 1235347771

In [0]:
airlines_df.columns

Out[12]: ['Year',
 'Month',
 'DayofMonth',
 'DayOfWeek',
 'DepTime',
 'CRSDepTime',
 'ArrTime',
 'CRSArrTime',
 'UniqueCarrier',
 'FlightNum',
 'TailNum',
 'ActualElapsedTime',
 'CRSElapsedTime',
 'AirTime',
 'ArrDelay',
 'DepDelay',
 'Origin',
 'Dest',
 'Distance',
 'TaxiIn',
 'TaxiOut',
 'Cancelled',
 'CancellationCode',
 'Diverted',
 'CarrierDelay',
 'WeatherDelay',
 'NASDelay',
 'SecurityDelay',
 'LateAircraftDelay',
 'IsArrDelayed',
 'IsDepDelayed']

In [0]:
from pyspark.sql.functions import *

In [0]:
flights_by_month = airlines_df\
    .groupBy(concat_ws('-', 'Year', 'Month').alias('FlightMOnth'))\
    .agg(count('*').alias('FlightCount'))

In [0]:
flights_by_month.explain()

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- HashAggregate(keys=[_groupingexpression#498], functions=[finalmerge_count(merge count#491L) AS count(1)#478L])
   +- Exchange hashpartitioning(_groupingexpression#498, 200), ENSURE_REQUIREMENTS, [plan_id=225]
      +- HashAggregate(keys=[_groupingexpression#498], functions=[partial_count(1) AS count#491L])
         +- Project [concat_ws(-, cast(Year#348 as string), cast(Month#349 as string)) AS _groupingexpression#498]
            +- FileScan csv [Year#348,Month#349] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1920 paths)[dbfs:/databricks-datasets/airlines/part-00000, dbfs:/databricks-dat..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Year:int,Month:int>




In [0]:
airlines_df\
    .groupBy(concat_ws('-', 'Year', 'Month').alias('FlightMOnth'))\
    .agg(count('*').alias('FlightCount'))\
    .orderBy('FlightMOnth')\
    .show(5)

+-----------+-----------+
|FlightMOnth|FlightCount|
+-----------+-----------+
|    1987-10|    4486191|
|    1987-11|    4228020|
|    1987-12|    4404028|
|     1988-1|    4369492|
|    1988-10|    4416691|
+-----------+-----------+
only showing top 5 rows



In [0]:
airlines_df\
    .write\
        .partitionBy('Year', 'Month')\
            .mode('overwrite')\
                    .save('dbfs:/FileStore/airlines')

In [0]:
# airlines_df\
#     .write\
#         .partitionBy('Year', 'Month')\
#             .mode('overwrite')\
#                 .format('delta')\
#                     .save('dbfs:/FileStore/airlines')

In [0]:
 %fs ls dbfs:/FileStore/airlines/Year=2008/Month=1

path,name,size,modificationTime
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00129-9d1c2c30-3151-499f-8484-0f7f66f89458.c000.snappy.parquet,part-00129-9d1c2c30-3151-499f-8484-0f7f66f89458.c000.snappy.parquet,1233620,1689178795397
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00213-d3cbc623-f855-4051-a7c5-d153a8d70655.c000.snappy.parquet,part-00213-d3cbc623-f855-4051-a7c5-d153a8d70655.c000.snappy.parquet,203828,1689179412908
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00253-53008225-7622-485f-a73e-8656d21d129d.c000.snappy.parquet,part-00253-53008225-7622-485f-a73e-8656d21d129d.c000.snappy.parquet,2530846,1689179702317
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00315-d689740a-8780-47d4-b9ae-be1db1063f9b.c000.snappy.parquet,part-00315-d689740a-8780-47d4-b9ae-be1db1063f9b.c000.snappy.parquet,10433858,1689180147825
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00345-c2115a43-8e7b-4375-8d6c-20cd208805f8.c000.snappy.parquet,part-00345-c2115a43-8e7b-4375-8d6c-20cd208805f8.c000.snappy.parquet,462411,1689180362777
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00375-0f48580a-89ae-45f8-87e5-4bd7df2277aa.c000.snappy.parquet,part-00375-0f48580a-89ae-45f8-87e5-4bd7df2277aa.c000.snappy.parquet,10274871,1689180573572
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00399-29e5fd0f-db61-4310-a56c-f5cf3782b5c9.c000.snappy.parquet,part-00399-29e5fd0f-db61-4310-a56c-f5cf3782b5c9.c000.snappy.parquet,8215217,1689180750098
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00552-e0ba6685-f459-470b-8857-922c7e828317.c000.snappy.parquet,part-00552-e0ba6685-f459-470b-8857-922c7e828317.c000.snappy.parquet,9644336,1689181834282
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00674-fceb3044-5681-4ae7-af90-ffb49c86bb48.c000.snappy.parquet,part-00674-fceb3044-5681-4ae7-af90-ffb49c86bb48.c000.snappy.parquet,983222,1689182694444
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00720-3c8a3892-3cf6-4876-a8b6-789fc38deae6.c000.snappy.parquet,part-00720-3c8a3892-3cf6-4876-a8b6-789fc38deae6.c000.snappy.parquet,1505406,1689183021887


In [0]:
# %fs rm -r dbfs:/FileStore/airlines/

In [0]:
 %fs ls dbfs:/FileStore

path,name,size,modificationTime
dbfs:/FileStore/airlines/,airlines/,0,0


In [0]:
 %fs ls dbfs:/FileStore/airlines/Year=2008/Month=12/

path,name,size,modificationTime
dbfs:/FileStore/airlines/Year=2008/Month=12/part-00250-af12fa57-8271-47a6-9ad5-b0ffe7a3f3b3.c000.snappy.parquet,part-00250-af12fa57-8271-47a6-9ad5-b0ffe7a3f3b3.c000.snappy.parquet,721571,1689179682588
dbfs:/FileStore/airlines/Year=2008/Month=12/part-00306-965c247f-c29a-43a1-8496-3f6e070d6174.c000.snappy.parquet,part-00306-965c247f-c29a-43a1-8496-3f6e070d6174.c000.snappy.parquet,9739058,1689180080218
dbfs:/FileStore/airlines/Year=2008/Month=12/part-00393-ef88655e-1eb1-4033-8e90-24f491530fa2.c000.snappy.parquet,part-00393-ef88655e-1eb1-4033-8e90-24f491530fa2.c000.snappy.parquet,10170456,1689180708137
dbfs:/FileStore/airlines/Year=2008/Month=12/part-00536-33a4bb61-b0c8-410b-9b37-ee9b6540ba2e.c000.snappy.parquet,part-00536-33a4bb61-b0c8-410b-9b37-ee9b6540ba2e.c000.snappy.parquet,10170456,1689181715947
dbfs:/FileStore/airlines/Year=2008/Month=12/part-00874-c8cb7615-d7fb-4e67-bf88-e89a1df22020.c000.snappy.parquet,part-00874-c8cb7615-d7fb-4e67-bf88-e89a1df22020.c000.snappy.parquet,9983651,1689184143143
dbfs:/FileStore/airlines/Year=2008/Month=12/part-00895-f644c99c-2252-4cf1-97ed-c8bc78c62917.c000.snappy.parquet,part-00895-f644c99c-2252-4cf1-97ed-c8bc78c62917.c000.snappy.parquet,449107,1689184303067
dbfs:/FileStore/airlines/Year=2008/Month=12/part-01278-5738d18a-951d-4a17-b823-5a9f6faf6603.c000.snappy.parquet,part-01278-5738d18a-951d-4a17-b823-5a9f6faf6603.c000.snappy.parquet,10170456,1689187060118
dbfs:/FileStore/airlines/Year=2008/Month=12/part-01469-b502025f-9db6-451e-8fc6-62b260f03140.c000.snappy.parquet,part-01469-b502025f-9db6-451e-8fc6-62b260f03140.c000.snappy.parquet,10170456,1689188413821
dbfs:/FileStore/airlines/Year=2008/Month=12/part-01499-a3bdb712-b424-482b-a9c7-ddb15ea77849.c000.snappy.parquet,part-01499-a3bdb712-b424-482b-a9c7-ddb15ea77849.c000.snappy.parquet,9223588,1689188635915
dbfs:/FileStore/airlines/Year=2008/Month=12/part-01509-f97036e8-2490-4c00-872d-ed20d2b98019.c000.snappy.parquet,part-01509-f97036e8-2490-4c00-872d-ed20d2b98019.c000.snappy.parquet,192742,1689188692967


In [0]:
files = []
for f in dbutils.fs.ls('dbfs:/FileStore/airlines'):
    if f.name.startswith('Year='):
        for yf in dbutils.fs.ls(f.path):
            if yf.name.startswith('Month='):
                for mf in dbutils.fs.ls(yf.path):
                    if mf.name.endswith('snappy.parquet'):
                        files.append((mf.name, mf.size))

[0;31m---------------------------------------------------------------------------[0m
[0;31mTypeError[0m                                 Traceback (most recent call last)
File [0;32m<command-1040304042039994>:9[0m
[1;32m      7[0m                     [38;5;28;01mif[39;00m mf[38;5;241m.[39mname[38;5;241m.[39mendswith([38;5;124m'[39m[38;5;124msnappy.parquet[39m[38;5;124m'[39m):
[1;32m      8[0m                         files[38;5;241m.[39mappend((mf[38;5;241m.[39mname, mf[38;5;241m.[39msize))
[0;32m----> 9[0m size [38;5;241m=[39m [38;5;28msum[39m(f[[38;5;241m1[39m] [38;5;28;01mfor[39;00m f [38;5;129;01min[39;00m files) [38;5;241m/[39m ([38;5;241m1024[39m [38;5;241m*[39m [38;5;241m1024[39m [38;5;241m*[39m [38;5;241m1024[39m)

File [0;32m/databricks/spark/python/pyspark/sql/utils.py:164[0m, in [0;36mtry_remote_functions.<locals>.wrapped[0;34m(*args, **kwargs)[0m
[1;32m    162[0m     [38;5;28;01mreturn[39;00m [38;5;28mgetattr[39m(

In [0]:
df = spark.createDataFrame(files, ["name", "size"])
total_size = df.select(sum(df["size"])).collect()[0][0] / (1024 * 1024 * 1024)
total_size
# size = sum(f[1] for f in files) / (1024 * 1024 * 1024)

Out[26]: 14.353810276836157

In [0]:
total_size

Out[31]: 14.353810276836157

In [0]:
airlines_df_1 = spark.read.parquet('dbfs:/FileStore/airlines')

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-2655898470138028>:1[0m
[0;32m----> 1[0m airlines_df_1 [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43mread[49m[38;5;241;43m.[39;49m[43mparquet[49m[43m([49m[38;5;124;43m'[39;49m[38;5;124;43mdbfs:/FileStore/airlines[39;49m[38;5;124;43m'[39;49m[43m)[49m

File [0;32m/databricks/spark/python/pyspark/instrumentation_utils.py:48[0m, in [0;36m_wrap_function.<locals>.wrapper[0;34m(*args, **kwargs)[0m
[1;32m     46[0m start [38;5;241m=[39m time[38;5;241m.[39mperf_counter()
[1;32m     47[0m [38;5;28;01mtry[39;00m:
[0;32m---> 48[0m     res [38;5;241m=[39m [43mfunc[49m[43m([49m[38;5;241;43m*[39;49m[43margs[49m[43m,[49m[43m [49m[38;5;241;43m*[39;49m[38;5;241;43m*[39;49m[43mkwargs[49m[43m)[49m
[1;32m     49[0m     logger[38;5;241m.[39ml

In [0]:
airlines_df_1 = spark.read.format("delta").load('dbfs:/FileStore/airlines')

In [0]:
airlines_df_1.count()

Out[29]: 1235347771

In [0]:
airlines_df_1\
    .filter('Year = 2008')\
    .groupBy(concat_ws('-', 'Year', 'Month').alias('FlightMonth'))\
    .agg(count('*').alias('FlightCOunt'))\
    .orderBy('FlightMonth')\
    .explain()

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- Sort [FlightMonth#170888 ASC NULLS FIRST], true, 0
   +- Exchange rangepartitioning(FlightMonth#170888 ASC NULLS FIRST, 200), ENSURE_REQUIREMENTS, [plan_id=802]
      +- LocalTableScan [FlightMonth#170888, FlightCOunt#170921L]




In [0]:
airlines_df_1\
    .filter('Year = 2008')\
    .groupBy(concat_ws('-', 'Year', 'Month').alias('FlightMonth'))\
    .agg(count('*').alias('FlightCOunt'))\
    .orderBy('FlightMonth')\
    .show()

+-----------+-----------+
|FlightMonth|FlightCOunt|
+-----------+-----------+
|     2008-1|    6057640|
|    2008-10|    5562040|
|    2008-11|    5232715|
|    2008-12|    5449575|
|     2008-2|    5692350|
|     2008-3|    6160890|
|     2008-4|    5981250|
|     2008-5|    6062920|
|     2008-6|    6086640|
|     2008-7|    6279300|
|     2008-8|    6122780|
|     2008-9|    5409070|
+-----------+-----------+



In [0]:
airlines_df_1.filter("Year = '2008'").count()

Out[32]: 70097170

In [0]:
 %fs ls dbfs:/FileStore/airlines/Year=2008/Month=1

path,name,size,modificationTime
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00129-9d1c2c30-3151-499f-8484-0f7f66f89458.c000.snappy.parquet,part-00129-9d1c2c30-3151-499f-8484-0f7f66f89458.c000.snappy.parquet,1233620,1689178795397
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00213-d3cbc623-f855-4051-a7c5-d153a8d70655.c000.snappy.parquet,part-00213-d3cbc623-f855-4051-a7c5-d153a8d70655.c000.snappy.parquet,203828,1689179412908
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00253-53008225-7622-485f-a73e-8656d21d129d.c000.snappy.parquet,part-00253-53008225-7622-485f-a73e-8656d21d129d.c000.snappy.parquet,2530846,1689179702317
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00315-d689740a-8780-47d4-b9ae-be1db1063f9b.c000.snappy.parquet,part-00315-d689740a-8780-47d4-b9ae-be1db1063f9b.c000.snappy.parquet,10433858,1689180147825
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00345-c2115a43-8e7b-4375-8d6c-20cd208805f8.c000.snappy.parquet,part-00345-c2115a43-8e7b-4375-8d6c-20cd208805f8.c000.snappy.parquet,462411,1689180362777
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00375-0f48580a-89ae-45f8-87e5-4bd7df2277aa.c000.snappy.parquet,part-00375-0f48580a-89ae-45f8-87e5-4bd7df2277aa.c000.snappy.parquet,10274871,1689180573572
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00399-29e5fd0f-db61-4310-a56c-f5cf3782b5c9.c000.snappy.parquet,part-00399-29e5fd0f-db61-4310-a56c-f5cf3782b5c9.c000.snappy.parquet,8215217,1689180750098
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00552-e0ba6685-f459-470b-8857-922c7e828317.c000.snappy.parquet,part-00552-e0ba6685-f459-470b-8857-922c7e828317.c000.snappy.parquet,9644336,1689181834282
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00674-fceb3044-5681-4ae7-af90-ffb49c86bb48.c000.snappy.parquet,part-00674-fceb3044-5681-4ae7-af90-ffb49c86bb48.c000.snappy.parquet,983222,1689182694444
dbfs:/FileStore/airlines/Year=2008/Month=1/part-00720-3c8a3892-3cf6-4876-a8b6-789fc38deae6.c000.snappy.parquet,part-00720-3c8a3892-3cf6-4876-a8b6-789fc38deae6.c000.snappy.parquet,1505406,1689183021887


In [0]:
airlines_df_1 = spark.read.format("delta").load('dbfs:/FileStore/airlines')

In [0]:
 %fs ls dbfs:/FileStore/airlines/Year=__HIVE_DEFAULT_PARTITION__/

In [0]:
airlines_df_1.count()

Out[2]: 1235347771

In [0]:
%sql

-- drop database if exists itversity_retail_db

In [0]:
%sql

use itversity_retail_db

In [0]:
%sql

select current_database()

current_database()
itversity_retail_db


In [0]:
%sql

CREATE OR REPLACE TEMPORARY VIEW airline_v
USING parquet
OPTIONS (
  path = 'dbfs:/FileStore/airlines'
);


In [0]:
%sql

describe airline_v

col_name,data_type,comment
DayofMonth,int,
DayOfWeek,int,
DepTime,string,
CRSDepTime,int,
ArrTime,string,
CRSArrTime,int,
UniqueCarrier,string,
FlightNum,int,
TailNum,string,
ActualElapsedTime,string,


In [0]:
%sql

show tables

database,tableName,isTemporary
itversity_retail_db,order_items,False
itversity_retail_db,orders,False
,airline_v,True


In [0]:
%sql

select count(*) 
from airline_v 
where Year = 2008

count(1)
70097170


In [0]:
%sql

DROP TABLE IF EXISTS airline_2008;

In [0]:
%sql

create table airline_2008
as
select distinct *
from airline_v 
where Year = 2008

num_affected_rows,num_inserted_rows


In [0]:
%sql

select count(*)
from airline_2008

count(1)
7009724


In [0]:
%sql

create table airline_lax
as
select distinct *
from airline_v 
where Origin = 'LAX'

num_affected_rows,num_inserted_rows


In [0]:
%sql

select count(*)
from airline_lax

count(1)
4089002


In [0]:
%sql

show tables

database,tableName,isTemporary
itversity_retail_db,airline_2008,False
itversity_retail_db,airline_lax,False
itversity_retail_db,order_items,False
itversity_retail_db,orders,False
,airline_v,True


In [0]:
%sql

SELECT DISTINCT *
FROM airline_v
WHERE CONCAT_WS('-', Year, LPAD(Month, 2, '0'), LPAD(DayOfMonth, 2, '0')) BETWEEN '2007-12-01' AND '2008-02-29'
LIMIT 5;

DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed,Year,Month
6,7,756,800,908,910,WN,2101,N416WN,72,70,56,-2,-4,ISP,BWI,220,5,11,0,,0,,,,,,NO,NO,2008,1
6,7,1200,1110,1617,1510,WN,43,N330SW,137,120,100,67,50,LAS,MAF,796,4,33,0,,0,6.0,0.0,17.0,0.0,44.0,YES,YES,2008,1
6,7,1055,1035,1522,1505,WN,574,N659SW,147,150,125,17,20,LAS,TUL,1076,3,19,0,,0,0.0,0.0,0.0,0.0,17.0,YES,YES,2008,1
6,7,1838,1725,2041,1940,WN,109,N243WN,63,75,50,61,73,LAX,PHX,370,4,9,0,,0,20.0,0.0,0.0,0.0,41.0,YES,YES,2008,1
6,7,2056,2010,2330,2250,WN,1076,N736SA,274,280,259,40,46,MDW,OAK,1844,6,9,0,,0,7.0,0.0,0.0,0.0,33.0,YES,YES,2008,1


In [0]:
%sql

SELECT COUNT(*)
FROM
(
  SELECT DISTINCT *
  FROM airline_v
  WHERE CONCAT_WS('-', Year, LPAD(Month, 2, '0'), LPAD(DayOfMonth, 2, '0')) BETWEEN '2007-12-01' AND '2008-02-29'
) AS subquery;


count(1)
1789134


In [0]:
%sql

create table airline_20071201_20080229
as
select distinct *
from airline_v 
where CONCAT_WS('-', Year, LPAD(Month, 2, '0'), LPAD(DayOfMonth, 2, '0')) BETWEEN '2007-12-01' AND '2008-02-29'

num_affected_rows,num_inserted_rows


In [0]:
dbutils.widgets.text('date_lower_bound', '2007-12-01', 'Enter Data Lower Bound: ')

In [0]:
dbutils.widgets.text('date_upper_bound', '2007-12-31', 'Enter Data Upper Bound: ')

In [0]:
%sql

create table airline_test
as
select distinct *
from airline_v 
where CONCAT_WS('-', Year, LPAD(Month, 2, '0'), LPAD(DayOfMonth, 2, '0')) 
BETWEEN '${date_lower_bound}' AND '${date_upper_bound}'

num_affected_rows,num_inserted_rows


In [0]:
%sql

select *
from airline_test

DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed,Year,Month
2,7,939.0,945,1221.0,1250,WN,2368,N213WN,162.0,185.0,144.0,-29.0,-6.0,PVD,MCO,1073,10,8,0,,0,0,0,0,0,0,NO,NO,2007,12
2,7,1032.0,1025,1140.0,1135,WN,213,N514SW,68.0,70.0,51.0,5.0,7.0,SAN,LAS,258,5,12,0,,0,0,0,0,0,0,YES,YES,2007,12
2,7,,645,,1035,WN,154,0,,170.0,,,,SEA,PHX,1107,0,0,1,A,0,0,0,0,0,0,YES,YES,2007,12
3,1,912.0,915,958.0,1025,WN,3502,N238WN,166.0,190.0,154.0,-27.0,-3.0,AUS,SAN,1164,2,10,0,,0,0,0,0,0,0,NO,NO,2007,12
3,1,1735.0,1735,1855.0,1905,WN,570,N528SW,80.0,90.0,68.0,-10.0,0.0,BNA,MSY,471,4,8,0,,0,0,0,0,0,0,NO,NO,2007,12
3,1,656.0,700,806.0,815,WN,3483,N720WN,70.0,75.0,58.0,-9.0,-4.0,BUR,OAK,325,3,9,0,,0,0,0,0,0,0,NO,NO,2007,12
3,1,648.0,650,749.0,755,WN,134,N446WN,61.0,65.0,52.0,-6.0,-2.0,BWI,BUF,281,2,7,0,,0,0,0,0,0,0,NO,NO,2007,12
3,1,1645.0,1635,1912.0,1920,WN,2795,N680AA,207.0,225.0,195.0,-8.0,10.0,BWI,HOU,1246,4,8,0,,0,0,0,0,0,0,NO,YES,2007,12
3,1,1327.0,1330,1549.0,1555,WN,426,N613SW,142.0,145.0,130.0,-6.0,-3.0,BWI,MCO,787,3,9,0,,0,0,0,0,0,0,NO,NO,2007,12
3,1,1852.0,1850,2103.0,2105,WN,1685,N328SW,131.0,135.0,117.0,-2.0,2.0,CMH,TPA,829,4,10,0,,0,0,0,0,0,0,NO,YES,2007,12


In [0]:
%sql 

drop table airline_test2

In [0]:
%sql

create table airline_test2
as
select distinct *
from airline_v 
where CONCAT_WS('-', Year, LPAD(Month, 2, '0'), LPAD(DayOfMonth, 2, '0')) 
BETWEEN '${date_lower_bound}' AND '${date_upper_bound}'
and Year BETWEEN year('&{date_lower_bound}') and year('&{date_upper_bound}')

num_affected_rows,num_inserted_rows


In [0]:
%sql

select *
from airline_test2

DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed,Year,Month


In [0]:
%sql 

show tables

database,tableName,isTemporary
itversity_retail_db,airline_20071201_20080229,False
itversity_retail_db,airline_2008,False
itversity_retail_db,airline_lax,False
itversity_retail_db,airline_test,False
itversity_retail_db,airline_test2,False
itversity_retail_db,order_items,False
itversity_retail_db,orders,False
,airline_v,True
