Skip to content

Commit

Permalink
Add testing example
Browse files Browse the repository at this point in the history
  • Loading branch information
seddonm1 committed Jan 20, 2020
1 parent 4255e7b commit 132f51c
Show file tree
Hide file tree
Showing 7 changed files with 611 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -10,4 +10,5 @@
.metals
.vscode
examples/tutorial/**/output
spark-warehouse
spark-warehouse
**.DS_Store
1 change: 1 addition & 0 deletions examples/README.md
Expand Up @@ -4,4 +4,5 @@
|-------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| fuzzy_match | This example demonstrates the use of the [SimilarityJoinTransform](https://arc.tripl.ai/transform/#similarityjointransform) stage to perform [Approximate string matching](https://en.wikipedia.org/wiki/Approximate_string_matching) (a.k.a. Fuzzy Matching) to compare two datasets for similar records.|
| movie_graph | This example demonstrates the use of SQL and Cypher to generate and query a graph structure. This could be used to add friends-of-friends type features to a fraud detection machine learning algorithm. |
| testing | This example demonstrates the use of the `environments` argument to facilitate automated testing of business logic. |
| tutorial | This example is used in the [Tutorial](https://arc.tripl.ai/tutorial) and contains subversions for the various stages of the [Tutorial](https://arc.tripl.ai/tutorial). |
1 change: 1 addition & 0 deletions examples/testing/.gitignore
@@ -0,0 +1 @@
*.parquet
296 changes: 296 additions & 0 deletions examples/testing/green_tripdata0.json
@@ -0,0 +1,296 @@
[
{
"id": "f457e562-5c7a-4215-a754-ab749509f3fb",
"name": "vendor_id",
"description": "A code indicating the TPEP provider that provided the record.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "integer",
"nullableValues": [
"",
"null"
]
},
{
"id": "d61934ed-e32e-406b-bd18-8d6b7296a8c0",
"name": "lpep_pickup_datetime",
"description": "The date and time when the meter was engaged.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "timestamp",
"formatters": [
"uuuu-MM-dd HH:mm:ss"
],
"timezoneId": "America/New_York",
"nullableValues": [
"",
"null"
]
},
{
"id": "d61934ed-e32e-406b-bd18-8d6b7296a8c0",
"name": "lpep_dropoff_datetime",
"description": "The date and time when the meter was disengaged.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "timestamp",
"formatters": [
"uuuu-MM-dd HH:mm:ss"
],
"timezoneId": "America/New_York",
"nullableValues": [
"",
"null"
]
},
{
"id": "aa315986-9fa9-4aa2-a72e-411196648351",
"name": "store_and_fwd_flag",
"description": "This flag indicates whether the trip record was held in vehicle memory before sending to the vendor, aka 'store and forward', because the vehicle did not have a connection to the server.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "boolean",
"nullableValues": [
"",
"null"
],
"trueValues": [
"Y"
],
"falseValues": [
"N"
]
},
{
"id": "ce66288c-65c1-45b7-83b4-5de3f38f89b7",
"name": "rate_code_id",
"description": "The final rate code in effect at the end of the trip.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "integer",
"nullableValues": [
"",
"null"
]
},
{
"id": "2d7b4a53-5203-4273-bd4a-3bbc742539ec",
"name": "pickup_longitude",
"description": "Longitude where the meter was engaged.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"0"
],
"precision": 18,
"scale": 14
},
{
"id": "a183ecd0-6169-429c-8bc0-0df4f08526e8",
"name": "pickup_latitude",
"description": "Latitude where the meter was engaged.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"0"
],
"precision": 18,
"scale": 14
},
{
"id": "a3d6135c-202f-4ba6-ab25-93fa6c28bc97",
"name": "dropoff_longitude",
"description": "Longitude where the meter was disengaged.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"0"
],
"precision": 18,
"scale": 14
},
{
"id": "77160ee6-5040-4444-a731-45902b32911f",
"name": "dropoff_latitude",
"description": "Latitude where the meter was disengaged.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"0"
],
"precision": 18,
"scale": 14
},
{
"id": "ef1fe668-7850-4ef5-966b-0813d2024c32",
"name": "passenger_count",
"description": "The number of passengers in the vehicle. This is a driver-entered value.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "integer",
"nullableValues": [
"",
"null"
]
},
{
"id": "77160ee6-5040-4444-a731-45902b32911f",
"name": "trip_distance",
"description": "The elapsed trip distance in miles reported by the taximeter.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"0",
"null"
],
"precision": 18,
"scale": 15
},
{
"id": "e71597c1-67ae-4176-9ae3-ae4dbe0886b9",
"name": "fare_amount",
"description": "The time-and-distance fare calculated by the meter.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "77d91cb6-22e4-4dba-883a-eee0c8690f31",
"name": "extra",
"description": "Miscellaneous extras and surcharges. Currently, this only includes the $0.50 and $1 rush hour and overnight charges.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "aebe7970-91dc-4155-b9a9-78dbcf836ac8",
"name": "mta_tax",
"description": "$0.50 MTA tax that is automatically triggered based on the metered rate in use.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "3630c209-a88c-4dd7-ab43-276234f04252",
"name": "tip_amount",
"description": "Tip amount – This field is automatically populated for credit card tips. Cash tips are not included.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "9d10371c-c08c-461a-a1a9-e5cd0c46655c",
"name": "tolls_amount",
"description": "Total amount of all tolls paid in trip.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "f59aba58-2a8c-40f9-830b-f1abafe80b7f",
"name": "ehail_fee",
"description": "Fee for allowing passengers to 'e-hail' a New York City taxicab via downloadable smartphone applications.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "1414fd4b-32ed-430c-a4b0-a569e7144bbb",
"name": "total_amount",
"description": "The total amount charged to passengers. Does not include cash tips.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "decimal",
"nullableValues": [
"",
"null"
],
"precision": 10,
"scale": 2
},
{
"id": "5b43ec13-dc16-40bd-8af5-4e2f85285e15",
"name": "payment_type",
"description": "A numeric code signifying how the passenger paid for the trip.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "integer",
"nullableValues": [
"",
"null"
]
},
{
"id": "bccf357f-6671-4168-998a-c991fdcf7fe0",
"name": "trip_type",
"description": "A code indicating whether the trip was a street-hail or a dispatch that is automatically assigned based on the metered rate in use but can be altered by the driver.",
"trim": true,
"nullable": true,
"primaryKey": false,
"type": "integer",
"nullableValues": [
"",
"null"
]
}
]
27 changes: 27 additions & 0 deletions examples/testing/payment_type_over_time.sql
@@ -0,0 +1,27 @@
-- this query calculates the percentage of different payment methods by month which could be used by a business to track whether to keep accepting cash etc.

-- get a count of all records so monthly percentage can be calculated
WITH green_tripdata_monthly_trips AS (
SELECT
COUNT(payment_type) AS green_tripdata_count
,DATE_TRUNC('MM', lpep_pickup_datetime) AS month
FROM green_tripdata0
GROUP BY month
)
-- use the count to calcualte percentages
SELECT
CASE
WHEN payment_type = '1' THEN 'Credit card'
WHEN payment_type = '2' THEN 'Cash'
WHEN payment_type = '3' THEN 'No charge'
WHEN payment_type = '4' THEN 'Dispute'
WHEN payment_type = '5' THEN 'Unknown'
WHEN payment_type = '6' THEN 'Voided trip'
ELSE 'Unknown'
END AS payment_type
,DATE_TRUNC('MM', lpep_pickup_datetime) AS month
,COUNT(payment_type) / green_tripdata_count AS percent
FROM green_tripdata0
INNER JOIN green_tripdata_monthly_trips ON DATE_TRUNC('MM', green_tripdata0.lpep_pickup_datetime) = green_tripdata_monthly_trips.month
GROUP BY payment_type, DATE_TRUNC('MM', green_tripdata0.lpep_pickup_datetime), green_tripdata_count
ORDER BY payment_type, DATE_TRUNC('MM', green_tripdata0.lpep_pickup_datetime)
16 changes: 16 additions & 0 deletions examples/testing/sqlvalidate_errors.sql
@@ -0,0 +1,16 @@
SELECT
SUM(error) = 0 AS valid
,TO_JSON(
NAMED_STRUCT(
'count', COUNT(error),
'errors', SUM(error)
)
) AS message
FROM (
SELECT
CASE
WHEN SIZE(_errors) > 0 THEN 1
ELSE 0
END AS error
FROM ${inputView}
) input_table

0 comments on commit 132f51c

Please sign in to comment.