In [1]:
import pandas as pd
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint

In [2]:
context = gx.get_context()

In [3]:
datasource = context.sources.add_pandas(name="final_project")

In [4]:
dataframe = pd.read_csv('ecommerce_data_cleaned1.csv')

In [5]:
dataframe["order_date"] = pd.to_datetime(dataframe["order_date"])
dataframe["ship_date"] = pd.to_datetime(dataframe["ship_date"])


In [6]:
dataframe.head()

Unnamed: 0,order_id,order_date,ship_date,aging,ship_mode,product_category,product,sales,quantity,discount,profit,shipping_cost,order_priority,customer_id,customer_name,city,state,country,region,months
0,AU-2015-1,2015-11-09,2015-11-17,8,First Class,Auto & Accessories,Car Media Players,140,2,5,46,4,Medium,LS-001,Lane Daniels,Brisbane,Queensland,Australia,Oceania,Nov
1,AU-2015-2,2015-06-30,2015-07-02,2,First Class,Auto & Accessories,Car Speakers,211,3,3,112,11,Medium,IZ-002,Alvarado Kriz,Berlin,Berlin,Germany,Central,Jun
2,AU-2015-3,2015-12-05,2015-12-13,8,First Class,Auto & Accessories,Car Body Covers,117,5,1,31,3,Critical,EN-003,Moon Weien,Porirua,Wellington,New Zealand,Oceania,Dec
3,AU-2015-4,2015-05-09,2015-05-16,7,First Class,Auto & Accessories,Car & Bike Care,118,2,5,26,2,High,AN-004,Sanchez Bergman,Kabul,Kabul,Afghanistan,Central Asia,May
4,AU-2015-5,2015-07-09,2015-07-18,9,First Class,Auto & Accessories,Tyre,250,1,4,160,16,Critical,ON-005,Rowe Jackson,Townsville,Queensland,Australia,Oceania,Jul


In [7]:
name = "ecommerce_final_project"
data_asset = datasource.add_dataframe_asset(name=name)
my_batch_request = data_asset.build_batch_request(dataframe=dataframe)

In [8]:
expectation_suite_name = "hacktiv8_final_project"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
validator = context.get_validator(
    batch_request=data_asset.build_batch_request(dataframe=dataframe),
    expectation_suite_name=expectation_suite_name,
)

print(validator.head())

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

    order_id order_date  ship_date  aging    ship_mode    product_category  \
0  AU-2015-1 2015-11-09 2015-11-17      8  First Class  Auto & Accessories   
1  AU-2015-2 2015-06-30 2015-07-02      2  First Class  Auto & Accessories   
2  AU-2015-3 2015-12-05 2015-12-13      8  First Class  Auto & Accessories   
3  AU-2015-4 2015-05-09 2015-05-16      7  First Class  Auto & Accessories   
4  AU-2015-5 2015-07-09 2015-07-18      9  First Class  Auto & Accessories   

             product  sales  quantity  discount  profit  shipping_cost  \
0  Car Media Players    140         2         5      46              4   
1       Car Speakers    211         3         3     112             11   
2    Car Body Covers    117         5         1      31              3   
3    Car & Bike Care    118         2         5      26              2   
4               Tyre    250         1         4     160             16   

  order_priority customer_id    customer_name        city       state  \
0         Med

In [9]:
# 1. Unique order_id
validator.expect_column_values_to_be_unique(column="order_id")

# 2. Non-negative values
for col in ["sales", "quantity", "profit", "shipping_cost"]:
    validator.expect_column_values_to_be_between(column=col, min_value=0)

# 3. Discount Range
validator.expect_column_values_to_be_between(column="discount", min_value=0, max_value=100)

# 4. Consistency Checks
ship_modes = dataframe["ship_mode"].unique()
validator.expect_column_values_to_be_in_set(column="ship_mode", value_set=list(ship_modes))

product_categories = dataframe["product_category"].unique()
validator.expect_column_values_to_be_in_set(column="product_category", value_set=list(product_categories))

order_priorities = dataframe["order_priority"].unique()
validator.expect_column_values_to_be_in_set(column="order_priority", value_set=list(order_priorities))

regions = dataframe["region"].unique()
validator.expect_column_values_to_be_in_set(column="region", value_set=list(regions))

# 5. Missing Values
validator.expect_column_values_to_not_be_null(column="order_id")


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 51275,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
validator.save_expectation_suite(discard_failed_expectations=False)

In [11]:
my_checkpoint_name = "hacktiv8_final_project"

checkpoint = Checkpoint(
    name=my_checkpoint_name,
    run_name_template="%Y%m%d-%H%M%S-hacktiv8_M3_checkpoint",
    data_context=context,
    batch_request=my_batch_request,
    expectation_suite_name=expectation_suite_name,
    action_list=[
        {
            "name": "store_validation_result",
            "action": {"class_name": "StoreValidationResultAction"},
        },
        {"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
    ],
)

In [12]:
context.add_or_update_checkpoint(checkpoint=checkpoint)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {
    "datasource_name": "final_project",
    "data_asset_name": "ecommerce_final_project",
    "options": {}
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "expectation_suite_name": "hacktiv8_final_project",
  "module_name": "great_expectations.checkpoint",
  "name": "hacktiv8_final_project",
  "profilers": [],
  "run_name_template": "%Y%m%d-%H%M%S-hacktiv8_M3_checkpoint",
  "runtime_configuration": {},
  "validations": []
}

In [13]:
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/76 [00:00<?, ?it/s]

In [14]:
context.build_data_docs()

{'local_site': 'file://C:\\Users\\user\\AppData\\Local\\Temp\\tmpnvpg4600\\index.html'}

In [15]:
context.open_data_docs()