# Create Hive Table for Yellow Taxi Data - Part 3

Create a Hive external table on the Parquet files uploaded to S3 by the [notebook](./yellow_taxi_data_pt1_ingest_to_s3.ipynb) and verify the data can be queried.

<div class="alert alert-block alert-info">
💡 <b>NOTE</b>
<br/>
<ul>
<li>The <b>hive3x</b> and <b>trino</b> projects need to be running for this notebook.</li>
<li>You can run projects with `cd the-project && docker compose up -d --wait`.
</div>


## Create Hive Table

In [1]:
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
%load_ext sql

In [3]:
%config SqlMagic.named_parameters = "enabled"

In [4]:
%sql trino://admin@${DOCKER_HOST_OR_IP}:8443/hive --connection_arguments '{"http_scheme":"https", "verify": false}'

In [5]:
%%sql 

show catalogs

Catalog
hive
iceberg
system
tpcds
vast


In [6]:
import os
S3A_BUCKET = os.getenv('S3A_BUCKET')
S3_HIVE_TAXI_URI = os.getenv("S3A_HIVE_TAXI_URI")

In [7]:
! echo {S3_HIVE_TAXI_URI}

s3a://csnow-bucket/nyt/


In [8]:
%%sql

CREATE SCHEMA IF NOT EXISTS hive.nyt

In [9]:
%%sql

SHOW SCHEMAS

Schema
default
information_schema
nyt
social_media


In [10]:
%%sql

DROP TABLE IF EXISTS nyt.yellow_tripdata

In [11]:
%%sql

CREATE TABLE nyt.yellow_tripdata (
  vendorid VARCHAR,
  tpep_pickup_datetime VARCHAR,
  tpep_dropoff_datetime VARCHAR,
  passenger_count BIGINT,
  trip_distance DOUBLE,
  ratecodeid BIGINT,
  store_and_fwd_flag DOUBLE,
  pulocationid INT,
  dolocationid INT,
  payment_type VARCHAR,
  fare_amount DOUBLE,
  extra DOUBLE,
  mta_tax DOUBLE,
  tip_amount DOUBLE,
  tolls_amount DOUBLE,
  improvement_surcharge DOUBLE,
  total_amount DOUBLE,
  congestion_surcharge DOUBLE,  
  airport_fee DOUBLE,
  dropoff_latitude DOUBLE,
  dropoff_longitude DOUBLE,
  pickup_latitude DOUBLE,
  pickup_longitude DOUBLE,
  surcharge DOUBLE
)
WITH (
  format = 'PARQUET',
  external_location = '{{S3_HIVE_TAXI_URI}}'
)

In [12]:
%%sql
    
SELECT format('%,d', COUNT(*)) as row_count FROM nyt.yellow_tripdata

row_count
1763456499


In [13]:
%%sql

SELECT *
FROM nyt.yellow_tripdata
LIMIT 100

vendorid,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,ratecodeid,store_and_fwd_flag,pulocationid,dolocationid,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee,dropoff_latitude,dropoff_longitude,pickup_latitude,pickup_longitude,surcharge
CMT,2009-04-28 21:25:57,2009-04-28 21:46:40,1,9.1,,0.0,,,Cash,22.9,,,0.0,4.15,,27.05,,,0.0,0.0,0.0,0.0,0.0
CMT,2009-04-28 23:49:05,2009-04-28 23:51:01,1,0.4,,0.0,,,Credit,3.3,,,1.0,0.0,,4.3,,,40.759182,-73.992046,40.762336,-73.997803,0.0
CMT,2009-04-28 15:18:02,2009-04-28 15:21:24,1,0.5999999999999999,,0.0,,,Cash,4.1,,,0.0,0.0,,4.1,,,40.715115,-74.010192,40.721387,-74.00860799999998,0.0
CMT,2009-04-30 11:29:09,2009-04-30 11:39:20,1,1.4,,0.0,,,Credit,7.3,,,0.6999999999999998,0.0,,8.0,,,40.74249,-74.006963,40.745998,-73.988595,0.0
CMT,2009-04-30 10:53:58,2009-04-30 11:12:56,1,2.1,,0.0,,,Cash,10.9,,,0.0,0.0,,10.9,,,40.748495,-74.00586199999998,40.752556,-73.978407,0.0
CMT,2009-04-24 20:58:06,2009-04-24 21:23:18,2,6.1,,0.0,,,Credit,18.1,,,3.72,0.0,,21.82,,,40.820638,-73.95458999999998,40.75593,-73.96780699999998,0.0
CMT,2009-04-24 22:32:45,2009-04-24 22:38:27,1,0.9,,0.0,,,Cash,5.3,,,0.0,0.0,,5.3,,,40.749672,-73.98903,40.737387,-73.996772,0.0
CMT,2009-04-24 23:08:02,2009-04-24 23:17:10,1,1.6,,0.0,,,Credit,6.9,,,1.48,0.0,,8.38,,,40.774008,-73.982127,40.757296,-73.97187599999998,0.0
VTS,2009-04-15 15:32:00,2009-04-15 15:44:00,5,1.13,,,,,CASH,7.3,,,0.0,0.0,,7.3,,,40.746697,-73.98160699999998,40.739257,-73.99468199999998,0.0
DDS,2009-04-18 22:27:40,2009-04-18 22:31:55,1,0.8,,,,,CASH,4.5,,,0.0,0.0,,5.0,,,40.801879,-73.96500399999998,40.793807,-73.972229,0.5


In [14]:
%%sql

SHOW CREATE TABLE nyt.yellow_tripdata

Create Table
"CREATE TABLE hive.nyt.yellow_tripdata (  vendorid varchar,  tpep_pickup_datetime varchar,  tpep_dropoff_datetime varchar,  passenger_count bigint,  trip_distance double,  ratecodeid bigint,  store_and_fwd_flag double,  pulocationid integer,  dolocationid integer,  payment_type varchar,  fare_amount double,  extra double,  mta_tax double,  tip_amount double,  tolls_amount double,  improvement_surcharge double,  total_amount double,  congestion_surcharge double,  airport_fee double,  dropoff_latitude double,  dropoff_longitude double,  pickup_latitude double,  pickup_longitude double,  surcharge double ) WITH (  external_location = 's3a://csnow-bucket/nyt',  format = 'PARQUET' )"
