==============================================================================================

Nama : Evan Juanto  

Batch : BSD-006

==============================================================================================

# Great-Expectations
me-validasi data menggunakan Great Expectations.

In [1]:
#import library
from great_expectations.data_context import FileDataContext
# Create a data context
context = FileDataContext.create(project_root_dir='./')

In [2]:
# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'milestone3'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'test_m3'
path_to_data = 'P2M3_Evan_Juanto_data_cleaned.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

## Expectation

In [3]:
# Creat an expectation suite
expectation_suite_name = 'expectation-m3-dataset'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,order_id,agent_age,agent_rating,store_latitude,store_longitude,drop_latitude,drop_longitude,order_date,order_time,pickup_time,weather,traffic,vehicle,area,delivery_time,category
0,ialx566343618,37,4.9,22.745049,75.892471,22.765049,75.912471,2022-03-19,11:30:00,11:45:00,Sunny,High,motorcycle,Urban,120,Clothing
1,akqg208421122,34,4.5,12.913041,77.683237,13.043041,77.813237,2022-03-25,19:45:00,19:50:00,Stormy,Jam,scooter,Metropolitian,165,Electronics
2,njpu434582536,23,4.4,12.914264,77.6784,12.924264,77.6884,2022-03-19,08:30:00,08:45:00,Sandstorms,Low,motorcycle,Urban,130,Sports
3,rjto796129700,38,4.7,11.003669,76.976494,11.053669,77.026494,2022-04-05,18:00:00,18:10:00,Sunny,Medium,motorcycle,Metropolitian,105,Cosmetics
4,zguw716275638,32,4.6,12.972793,80.249982,13.012793,80.289982,2022-03-26,13:30:00,13:45:00,Cloudy,High,scooter,Metropolitian,150,Toys


### 1.  `order_code` to be unique

In [4]:
validator.expect_column_values_to_be_unique('order_id')

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 43739,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 2. `agent_rating` to be between 0 and 6

In [5]:
validator.expect_column_values_to_be_between('agent_rating', min_value=0, max_value=6)


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 43739,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 3. To be in set `vehicle`


In [6]:
validator.expect_column_values_to_be_in_set('vehicle', ['motorcycle ', 'scooter ', 'van','bicycle '])


Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 43739,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 4. `agent_rating` values to be of type 'float'

In [10]:
validator.expect_column_values_to_be_of_type('agent_rating', 'float')


Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 5. `weather` value lengths to be between 3

In [11]:
validator.expect_column_value_lengths_to_be_between('weather', min_value=3)


Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 43739,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 91,
    "missing_percent": 0.20805231029515994,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 6. `agent_age` min to be between 15 and 65

In [14]:
validator.expect_column_min_to_be_between('agent_age', min_value=15, max_value=65)

Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": 15
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

### 7. `category` column distinct values to be in set

In [20]:
validator.expect_column_distinct_values_to_be_in_set('category', ['Clothing', 'Electronics', 'Sports', 'Cosmetics', 'Toys', 'Snacks',
       'Shoes', 'Apparel', 'Jewelry', 'Outdoors', 'Grocery', 'Books',
       'Kitchen', 'Home', 'Pet Supplies', 'Skincare'])


Calculating Metrics:   0%|          | 0/4 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": [
      "Apparel",
      "Books",
      "Clothing",
      "Cosmetics",
      "Electronics",
      "Grocery",
      "Home",
      "Jewelry",
      "Kitchen",
      "Outdoors",
      "Pet Supplies",
      "Shoes",
      "Skincare",
      "Snacks",
      "Sports",
      "Toys"
    ],
    "details": {
      "value_counts": [
        {
          "value": "Apparel",
          "count": 2726
        },
        {
          "value": "Books",
          "count": 2824
        },
        {
          "value": "Clothing",
          "count": 2667
        },
        {
          "value": "Cosmetics",
          "count": 2677
        },
        {
          "value": "Electronics",
          "count": 2849
        },
        {
          "value": "Grocery",
          "count": 2691
        },
        {
          "value": "Home",
          "count": 2685
        },
        {
          "value": "Jewelry",
          "count": 2802
        },
        {
        