# Create Hive Table for Yellow Taxi Data - Part 3

Create a Hive external table on the Parquet files uploaded to S3 by the [notebook](./yellow_taxi_data_pt1_ingest_to_s3.ipynb) and verify the data can be queried.

<div class="alert alert-block alert-info">
💡 <b>NOTE</b>
<br/>
<ul>
<li>The <b>hive3x</b> and <b>trino</b> projects need to be running for this notebook.</li>
<li>You can run projects with `cd the-project && docker compose up -d --wait`.
</div>


## Create Hive Table

In [1]:
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
%load_ext sql

In [3]:
%config SqlMagic.named_parameters = "enabled"

In [4]:
%sql trino://admin@${DOCKER_HOST_OR_IP}:8443/hive --connection_arguments '{"http_scheme":"https", "verify": false}'

In [5]:
%%sql 

show catalogs

Catalog
hive
iceberg
system
tpcds
vast


In [6]:
import os
S3A_BUCKET = os.getenv('S3A_BUCKET')
S3_HIVE_TAXI_URI = os.getenv("S3A_HIVE_TAXI_URI")

In [7]:
! echo {S3_HIVE_TAXI_URI}

s3a://csnow-bucket/nyt/


In [8]:
%%sql

CREATE SCHEMA IF NOT EXISTS hive.nyt

In [9]:
%%sql

SHOW SCHEMAS

Schema
default
information_schema
nyt
social_media


In [10]:
%%sql

DROP TABLE IF EXISTS nyt.yellow_tripdata

In [11]:
%%sql

CREATE TABLE nyt.yellow_tripdata (
  vendorid VARCHAR,
  tpep_pickup_datetime VARCHAR,
  tpep_dropoff_datetime VARCHAR,
  passenger_count BIGINT,
  trip_distance DOUBLE,
  ratecodeid BIGINT,
  store_and_fwd_flag DOUBLE,
  pulocationid INT,
  dolocationid INT,
  payment_type VARCHAR,
  fare_amount DOUBLE,
  extra DOUBLE,
  mta_tax DOUBLE,
  tip_amount DOUBLE,
  tolls_amount DOUBLE,
  improvement_surcharge DOUBLE,
  total_amount DOUBLE,
  congestion_surcharge DOUBLE,  
  airport_fee DOUBLE,
  dropoff_latitude DOUBLE,
  dropoff_longitude DOUBLE,
  pickup_latitude DOUBLE,
  pickup_longitude DOUBLE,
  surcharge DOUBLE
)
WITH (
  format = 'PARQUET',
  external_location = '{{S3_HIVE_TAXI_URI}}'
)

In [12]:
%%sql
    
SELECT format('%,d', COUNT(*)) as row_count FROM nyt.yellow_tripdata

row_count
1763456499


In [13]:
%%sql

SELECT *
FROM nyt.yellow_tripdata
LIMIT 100

vendorid,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,ratecodeid,store_and_fwd_flag,pulocationid,dolocationid,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee,dropoff_latitude,dropoff_longitude,pickup_latitude,pickup_longitude,surcharge
CMT,2009-08-04 11:15:56,2009-08-04 11:23:03,2,0.6999999999999998,,0.0,-999,-999,Credit,5.7,-999.0,,0.8499999999999999,0.0,-999.0,6.55,-999.0,-999.0,40.741813,-74.00710599999998,40.736314,-73.99410899999998,0.0
CMT,2009-08-04 11:52:40,2009-08-04 12:03:14,1,1.0,,0.0,-999,-999,Cash,7.3,-999.0,,0.0,0.0,-999.0,7.3,-999.0,-999.0,40.751205,-73.994009,40.744782,-74.006208,0.0
CMT,2009-08-03 12:43:57,2009-08-03 12:53:35,1,0.8,,0.0,-999,-999,Cash,6.5,-999.0,,0.0,0.0,-999.0,6.5,-999.0,-999.0,40.759362,-73.969178,40.753566,-73.978063,0.0
DDS,2009-08-04 08:33:33,2009-08-04 08:46:08,1,0.9,,,-999,-999,CASH,7.7,-999.0,,0.0,0.0,-999.0,7.7,-999.0,-999.0,40.755724,-74.001698,40.750092,-73.987059,0.0
CMT,2009-08-04 12:16:11,2009-08-04 12:20:45,1,0.5,,0.0,-999,-999,Cash,4.5,-999.0,,0.0,0.0,-999.0,4.5,-999.0,-999.0,40.757863,-73.97730199999998,40.764543,-73.972915,0.0
CMT,2009-08-04 15:31:12,2009-08-04 15:40:27,1,1.9,,0.0,-999,-999,Cash,7.3,-999.0,,0.0,0.0,-999.0,7.3,-999.0,-999.0,40.769455,-73.95173599999998,40.764198,-73.97701999999998,0.0
CMT,2009-08-04 10:39:12,2009-08-04 10:47:45,1,1.1,,0.0,-999,-999,Cash,6.1,-999.0,,0.0,0.0,-999.0,6.1,-999.0,-999.0,40.767754,-73.981223,40.764596,-73.966614,0.0
CMT,2009-08-04 12:19:23,2009-08-04 12:42:24,2,2.7,,0.0,-999,-999,Cash,12.9,-999.0,,0.0,0.0,-999.0,12.9,-999.0,-999.0,40.749406,-73.99171599999998,40.783421,-73.978965,0.0
CMT,2009-08-04 15:31:10,2009-08-04 16:11:48,1,7.8,,0.0,-999,-999,Credit,25.3,-999.0,,3.79,0.0,-999.0,29.09,-999.0,-999.0,40.768251,-73.98935,40.774019,-73.87455699999998,0.0
CMT,2009-08-04 18:30:22,2009-08-04 18:48:47,1,2.4,,1.0,-999,-999,Credit,10.9,-999.0,,2.38,0.0,-999.0,13.28,-999.0,-999.0,40.769092,-73.985055,40.742206,-73.993323,0.0


In [14]:
%%sql

SHOW CREATE TABLE nyt.yellow_tripdata

Create Table
"CREATE TABLE hive.nyt.yellow_tripdata (  vendorid varchar,  tpep_pickup_datetime varchar,  tpep_dropoff_datetime varchar,  passenger_count bigint,  trip_distance double,  ratecodeid bigint,  store_and_fwd_flag double,  pulocationid integer,  dolocationid integer,  payment_type varchar,  fare_amount double,  extra double,  mta_tax double,  tip_amount double,  tolls_amount double,  improvement_surcharge double,  total_amount double,  congestion_surcharge double,  airport_fee double,  dropoff_latitude double,  dropoff_longitude double,  pickup_latitude double,  pickup_longitude double,  surcharge double ) WITH (  external_location = 's3a://csnow-bucket/nyt',  format = 'PARQUET' )"
