In [1]:
# Install the library

!pip install -q great-expectations

In [1]:
#Library for great expectation
from great_expectations.data_context import FileDataContext

# Create a data context
context = FileDataContext.create(project_root_dir='./')

In [3]:
# Datasource name
datasource_name = 'csv-data-sc'
datasource = context.sources.add_pandas(datasource_name)

# Dataset name
asset_name = 'supply_chain'
path_to_data = 'dags\P2M3_agus_susanto_data_clean.csv'  # Update the path to the local file
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

In [5]:
# Create an expectation suite
expectation_suite_name = 'expectation-scs-dataset'
context.add_expectation_suite(expectation_suite_name)

# Create a validator
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0.1,Unnamed: 0,type,days_for_shipping_real,days_for_shipment_scheduled,benefit_per_order,sales_per_customer,delivery_status,late_delivery_risk,category_id,category_name,...,order_zipcode,product_card_id,product_category_id,product_description,product_image,product_name,product_price,product_status,shipping_date_dateorders,shipping_mode
0,0,CASH,2,1,183.990005,399.980011,Late delivery,1,45,Fishing,...,-,1004,45,-,http://images.acmesports.sports/Field+%26+Stre...,Field & Stream Sportsman 16 Gun Fire Safe,399.980011,0,8/18/2015 11:46,First Class
1,1,CASH,2,1,-166.309998,395.980011,Late delivery,1,45,Fishing,...,-,1004,45,-,http://images.acmesports.sports/Field+%26+Stre...,Field & Stream Sportsman 16 Gun Fire Safe,399.980011,0,9/23/2015 2:57,First Class
2,2,CASH,2,1,29.700001,296.980011,Late delivery,1,43,Camping & Hiking,...,-,957,43,-,http://images.acmesports.sports/Diamondback+Wo...,Diamondback Women's Serene Classic Comfort Bi,299.980011,0,9/9/2015 7:32,First Class
3,3,CASH,2,1,182.149994,395.980011,Late delivery,1,45,Fishing,...,-,1004,45,-,http://images.acmesports.sports/Field+%26+Stre...,Field & Stream Sportsman 16 Gun Fire Safe,399.980011,0,8/14/2015 5:07,First Class
4,4,CASH,2,1,-779.049988,293.980011,Late delivery,1,43,Camping & Hiking,...,-,957,43,-,http://images.acmesports.sports/Diamondback+Wo...,Diamondback Women's Serene Classic Comfort Bi,299.980011,0,6/29/2017 9:56,First Class


# **to be unique**

In [8]:
#Validator check colum order_item_id is uniq
validator.expect_column_values_to_be_unique('order_item_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 180519,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# **to be between min_value and max_value**

In [10]:
#Validator check colum sales is in range 0-100000
validator.expect_column_values_to_be_between(
    column='sales', min_value=0, max_value=1000000
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 180519,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# **to be in set**

In [12]:
#Validator check colum product_price is float and integer format
validator.expect_column_values_to_be_in_set(column='order_status', value_set=['CLOSED', 'PENDING_PAYMENT', 'PAYMENT_REVIEW', 'COMPLETE',
       'ON_HOLD', 'PENDING', 'PROCESSING', 'SUSPECTED_FRAUD', 'CANCELED'])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 180519,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# **to be in type list**

In [13]:
#Validator check colum product_price is float and integer format
validator.expect_column_values_to_be_in_type_list('product_price', ['integer', 'float'])

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# **to be Datetime**

In [15]:
#Validator check colum shipping_date_dateorders is datetime format
validator.expect_column_values_to_match_strftime_format(column='order_date_dateorders', strftime_format='%m/%d/%Y %H:%M')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 180519,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# **to be strings that match a given regular expression**

In [18]:
#Validator check colum shipping_date_dateorders is datetime format
validator.expect_column_values_to_match_regex(
    column='shipping_date_dateorders',
    regex=r'^\d{1,2}/\d{1,2}/\d{4} \d{1,2}:\d{2}$'
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 180519,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

# **to be strings that do NOT match a minus data**

In [20]:
#Validator check colum product_price is not minus number
validator.expect_column_values_to_not_match_regex(
    column='product_price',
    regex=r'^-[0-9]+(?:\.[0-9]+)?$'
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 180519,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}