# Tutorial

In [2]:
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint

In [3]:
# creating a context
context = gx.get_context()

In [4]:
# loading data from GE github
validator = context.sources.pandas_default.read_csv(
    "https://raw.githubusercontent.com/great-expectations/gx_tutorials/main/data/yellow_tripdata_sample_2019-01.csv"
)

In [5]:
# creating two expectation and saving them to the validator
validator.expect_column_values_to_not_be_null("pickup_datetime")
validator.expect_column_values_to_be_between(
    "passenger_count", min_value=1, max_value=6
)
validator.save_expectation_suite()

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

In [6]:
# creating a checkpoint to save results
checkpoint = context.add_or_update_checkpoint(
    name="my_quickstart_checkpoint",
    validator=validator,
)

In [7]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/15 [00:00<?, ?it/s]

In [8]:
# to view results
context.view_validation_result(checkpoint_result)

# Apply to our use case

In [9]:
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint

In [10]:
csv_file_path = "../data/test_part_1.csv"


In [11]:
# creating a context
context = gx.get_context()

In [12]:
validator = context.sources.pandas_default.read_csv(csv_file_path)

In [13]:
# creating two expectation and saving them to the validator
validator.expect_table_columns_to_match_ordered_list(["Store","Dept","Date","Weekly_Sales","Temperature","Fuel_Price",
                                                    "MarkDown1","MarkDown2","MarkDown3","MarkDown4","MarkDown5","CPI",
                                                    "Unemployment","IsHoliday","Type","Size"])

validator.save_expectation_suite()

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
# creating a checkpoint to 
checkpoint = context.add_or_update_checkpoint(
    name="my_test_checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

Checking multiple conditions at once:

In [16]:
validator = context.sources.pandas_default.read_csv(csv_file_path)

In [17]:
# creating two expectation and saving them to the validator
validator.expect_table_columns_to_match_ordered_list(["Store","Dept","Date","Weekly_Sales","Temperature","Fuel_Price",
                                                    "MarkDown1","MarkDown2","MarkDown3","MarkDown4","MarkDown5","CPI",
                                                    "Unemployment","IsHoliday","Type","Size"])
validator.expect_column_values_to_not_be_null("Date")
validator.save_expectation_suite()

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

In [18]:
# creating a checkpoint to 
checkpoint = context.add_or_update_checkpoint(
    name="test_checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

In [None]:
# all in one cell
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint
csv_file_path = "../data/test_part_1.csv"

# creating a context
context = gx.get_context()
validator = context.sources.pandas_default.read_csv(csv_file_path)

# creating two expectation and saving them to the validator
validator.expect_table_columns_to_match_ordered_list(["Store","Dept","Date","Weekly_Sales","Temperature","Fuel_Price",
                                                    "MarkDown1","MarkDown2","MarkDown3","MarkDown4","MarkDown5","CPI",
                                                    "Unemployment","IsHoliday","Type","Size"])
validator.expect_column_values_to_not_be_null("Date")
validator.save_expectation_suite()

# creating a checkpoint to 
checkpoint = context.add_or_update_checkpoint(
    name="my_test_checkpoint",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)

## Using suite to test multiple conditions at once and save them

In [19]:
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint
import pandas as pd

In [20]:
csv_file_path = "../data/test_part_1.csv"

In [21]:
context = gx.get_context()
validator = context.sources.pandas_default.read_csv(csv_file_path)

In [24]:
suite = context.add_expectation_suite(expectation_suite_name="test")

In [25]:
from great_expectations.core.expectation_configuration import (
    ExpectationConfiguration,
)

# Create an Expectation
column_order_expectation = ExpectationConfiguration(
    # Name of expectation type being added
    expectation_type="expect_table_columns_to_match_ordered_list",
    # These are the arguments of the expectation
    # The keys allowed in the dictionary are Parameters and
    # Keyword Arguments of this Expectation Type
    kwargs={
        "column_list": [
            "Dept","Store","Date","Weekly_Sales","Temperature","Fuel_Price",
                                                    "MarkDown1","MarkDown2","MarkDown3","MarkDown4","MarkDown5","CPI",
                                                    "Unemployment","IsHoliday","Type","Size"
        ]
    }
)
# Add the Expectation to the suite
suite.add_expectation(
    expectation_configuration=column_order_expectation,
    overwrite_existing=False
)

{"expectation_type": "expect_table_columns_to_match_ordered_list", "kwargs": {"column_list": ["Dept", "Store", "Date", "Weekly_Sales", "Temperature", "Fuel_Price", "MarkDown1", "MarkDown2", "MarkDown3", "MarkDown4", "MarkDown5", "CPI", "Unemployment", "IsHoliday", "Type", "Size"]}, "meta": {}}

In [26]:
date_not_null = ExpectationConfiguration(
    expectation_type="expect_column_values_to_not_be_null",
    kwargs={
        "column": "Date",
        "mostly": 1.0, #expect all data to be not null so 1 (100%)
    }
)
suite.add_expectation(
    expectation_configuration=date_not_null,
    overwrite_existing=False
)

{"expectation_type": "expect_column_values_to_not_be_null", "kwargs": {"column": "Date", "mostly": 1.0}, "meta": {}}

In [27]:
cpi_not_null = ExpectationConfiguration(
    expectation_type="expect_column_values_to_not_be_null",
    kwargs={
        "column": "CPI",
        "mostly": 1.0, #expect all data to be not null so 1 (100%)
    }
)
suite.add_expectation(
    expectation_configuration=cpi_not_null,
    overwrite_existing=False
)

{"expectation_type": "expect_column_values_to_not_be_null", "kwargs": {"column": "CPI", "mostly": 1.0}, "meta": {}}

In [28]:
context.save_expectation_suite(expectation_suite=suite)

'/Users/julien/Documents/EPITA/S2/DSP/dsp-project-JPS/gx/expectations/test.json'

In [29]:
# creating a checkpoint to 
checkpoint = context.add_or_update_checkpoint(
    name="xyz",
    validator=validator,
)
checkpoint_result = checkpoint.run()
context.view_validation_result(checkpoint_result)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]