In [1]:
import great_expectations as gx

In [2]:
# Membuat data context
context = gx.get_context(project_root_dir='./')

In [3]:
# membuat nama untuk datasource
datasource_name = 'csv-data-dim-country-economic'
datasource = context.data_sources.add_pandas(datasource_name)

# membuat nama untuk data asset
asset_name = 'dim_country_economic_table'
path_to_data = 'df_country_economic.csv'
asset = datasource.add_csv_asset(name=asset_name, filepath_or_buffer=path_to_data)

# Membuat batch request
batch_request = asset.build_batch_request()

In [4]:
# membuat expectation suite
expectation_suite_name = 'expectation-country-economic-dataset'
suite = gx.ExpectationSuite(name=expectation_suite_name)

# memasukkan Expectation Suite ke dalam Data Context
suite = context.suites.add(suite)
suite = context.suites.get(name=expectation_suite_name)

In [5]:
# membuat validator sesuai dengan expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# mengecek validator
validator.head()

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 199.96it/s]


Unnamed: 0,country_name,year,gdp_growth,gdp_per_capita
0,Afghanistan,2000,0.0,0.0
1,Afghanistan,2001,0.0,0.0
2,Afghanistan,2002,0.0,179.426579
3,Afghanistan,2003,8.832278,190.683814
4,Afghanistan,2004,1.414118,211.382074


In [7]:
# Expectation 1 : kolom `country_name` tidak boleh memiliki missing value

validator.expect_column_values_to_not_be_null('country_name')

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 140.56it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_not_be_null",
    "kwargs": {
      "batch_id": "csv-data-dim-country-economic-dim_country_economic_table",
      "column": "country_name"
    },
    "meta": {}
  },
  "result": {
    "element_count": 3649,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [8]:
# Expectation 2 : tipe data pada kolom 'gdp_growth' harus integer atau float

validator.expect_column_values_to_be_in_type_list('gdp_growth', ['integer', 'float'])

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 140.12it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_be_in_type_list",
    "kwargs": {
      "batch_id": "csv-data-dim-country-economic-dim_country_economic_table",
      "column": "gdp_growth",
      "type_list": [
        "integer",
        "float"
      ]
    },
    "meta": {}
  },
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [9]:
# Expectation 3 : kolom 'year' harus ada

validator.expect_column_to_exist(column='year')

Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 162.38it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_to_exist",
    "kwargs": {
      "batch_id": "csv-data-dim-country-economic-dim_country_economic_table",
      "column": "year"
    },
    "meta": {}
  },
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 4 : minimum value di kolom 'year' adalah 2000

validator.expect_column_min_to_be_between('year', 2000)

Calculating Metrics: 100%|██████████| 4/4 [00:00<00:00, 79.53it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_min_to_be_between",
    "kwargs": {
      "batch_id": "csv-data-dim-country-economic-dim_country_economic_table",
      "column": "year",
      "min_value": 2000.0
    },
    "meta": {}
  },
  "result": {
    "observed_value": 2000
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}