In [82]:
#%pip install great-expectations

In [103]:
#import libraries
import great_expectations as gx
import pandas as pd
from great_expectations.checkpoint import Checkpoint

In [104]:
#mendefine environmetn mengambil context di environtment gx
context = gx.get_context()

In [85]:
#mendefine datasource
datasource = context.sources.add_pandas(name="my_pandas_datasource1")

In [86]:
#menload data csv 
dataframe = pd.read_csv('P2M3_Kenneth_Vincentius_data_clean.csv')

In [88]:
#mendefine nama , data asset dan my batch request
name = "heart_atk"
data_asset = datasource.add_dataframe_asset(name=name)
my_batch_request = data_asset.build_batch_request(dataframe=dataframe)

In [89]:
#mendefine validator untuk melakukan pengujian great expectation
expectation_suite_name = "Milestone3"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
validator = context.get_validator(
    batch_request=data_asset.build_batch_request(dataframe=dataframe),
    expectation_suite_name=expectation_suite_name,
)

print(validator.head())

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

   age     sex                cp  trtbps  chol    fbs        restecg  \
0   63  female      asymptomatic     145   233   True         Normal   
1   37  female  non-anginal pain     130   250  False  ST-T Abnormal   
2   41    male   atypical angina     130   204  False         Normal   
3   56  female   atypical angina     120   236  False  ST-T Abnormal   
4   57    male    typical angina     120   354  False  ST-T Abnormal   

   thalachh   exng  oldpeak               slp  caa              thall  \
0       150  False      2.3           Sloping    0       Fixed Defect   
1       187  False      3.5           Sloping    0  Reversible Defect   
2       172  False      1.4  Upward Diagnosis    0  Reversible Defect   
3       178  False      0.8  Upward Diagnosis    0  Reversible Defect   
4       163   True      0.6  Upward Diagnosis    0  Reversible Defect   

   heart_attack  
0          True  
1          True  
2          True  
3          True  
4          True  


In [90]:
#mengecek apakah kolom heart attack ada missing values atau tidak
validator.expect_column_values_to_not_be_null(column="heart_attack")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 302,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [91]:
#mengecek apakah exng attack ada missing values atau tidak
validator.expect_column_values_to_not_be_null(column="exng")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 302,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [92]:
#mengecek apakah kolom age terdiri dari umur 29-77
validator.expect_column_values_to_be_between(
    column="age", min_value=29, max_value=77
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 302,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [93]:
#mengecek apakah ada missing values dari kolom trtbps
validator.expect_column_values_to_not_be_null(column="trtbps")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 302,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [94]:
#mengecek apakh kolom chol terdiri dari value 126-564
validator.expect_column_values_to_be_between(
    column="chol", min_value=126, max_value=564
)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 302,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [95]:
#mengecek apakah dikolom sex terdiri dari male dan female saja
validator.expect_column_values_to_be_in_set("sex", ["male", "female"])

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 302,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [96]:
#mengecek apakah ada kolom bernama cp
validator.expect_column_to_exist(column="cp")

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [97]:
#menyimpan validator-validator yang sudah dicoba cek
validator.save_expectation_suite(discard_failed_expectations=False)

In [98]:
#mendefine check point
my_checkpoint_name = "Milestone3"

checkpoint = Checkpoint(
    name=my_checkpoint_name,
    run_name_template="%Y%m%d-%H%M%S-Milestone3",
    data_context=context,
    batch_request=my_batch_request,
    expectation_suite_name=expectation_suite_name,
    action_list=[
        {
            "name": "store_validation_result",
            "action": {"class_name": "StoreValidationResultAction"},
        },
        {"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
    ],
)

In [99]:
#menambahkan check point ke dalam context
context.add_or_update_checkpoint(checkpoint=checkpoint)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {
    "datasource_name": "my_pandas_datasource",
    "data_asset_name": "heart_atk",
    "options": {}
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "expectation_suite_name": "hacktiv8_car_assignment",
  "module_name": "great_expectations.checkpoint",
  "name": "hacktiv8_cars_checkpoint",
  "profilers": [],
  "run_name_template": "%Y%m%d-%H%M%S-hacktiv8_cars_checkpoint",
  "runtime_configuration": {},
  "validations": []
}

In [100]:
#menrun check point
checkpoint_result = checkpoint.run()

Calculating Metrics:   0%|          | 0/39 [00:00<?, ?it/s]

In [101]:
#membuat index html dalam local data
context.build_data_docs()

{'local_site': 'file://c:\\Users\\kenne\\Pribadi\\Hackt8\\Phase 2\\ms_phase2\\dags\\gx\\uncommitted/data_docs/local_site/index.html'}

In [102]:
#membuka html dari great expectation
#context.open_data_docs()