In [1]:
import great_expectations as gx

In [2]:
# Membuat data context
context = gx.get_context(project_root_dir='./')

In [3]:
# membuat nama untuk datasource
datasource_name = 'csv-data-fact'
datasource = context.data_sources.add_pandas(datasource_name)

# membuat nama untuk data asset
asset_name = 'fact_table'
path_to_data = 'df_fact.csv'
asset = datasource.add_csv_asset(name=asset_name, filepath_or_buffer=path_to_data)

# Membuat batch request
batch_request = asset.build_batch_request()

In [4]:
# membuat expectation suite
expectation_suite_name = 'expectation-fact-dataset'
suite = gx.ExpectationSuite(name=expectation_suite_name)

# memasukkan Expectation Suite ke dalam Data Context
suite = context.suites.add(suite)
suite = context.suites.get(name=expectation_suite_name)

In [5]:
# membuat validator sesuai dengan expectation suite
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# mengecek validator
validator.head()

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 166.75it/s]


Unnamed: 0,country_name,year,access_electricity,access_clean_fuels,renewable_electricity,financial_flows,renewable_energy_share,electricity_fossil_fuels,electricity_nuclear,electricity_renewables,low_carbon_electricity,primary_energy_consumption,energy_intensity_level,value_co2_emissions,renewables
0,Afghanistan,2000,1.613591,6.2,9.22,20000.0,44.99,0.16,0.0,0.31,65.95744,302.59482,1.64,760.0,0.0
1,Afghanistan,2001,4.074574,7.2,8.86,130000.0,45.6,0.09,0.0,0.5,84.745766,236.89185,1.74,730.0,0.0
2,Afghanistan,2002,9.409158,8.2,8.47,3950000.0,37.83,0.13,0.0,0.56,81.159424,210.86215,1.4,1029.999971,0.0
3,Afghanistan,2003,14.738506,9.5,8.09,25970000.0,36.66,0.31,0.0,0.63,67.02128,229.96822,1.4,1220.000029,0.0
4,Afghanistan,2004,20.064968,10.9,7.75,0.0,44.24,0.33,0.0,0.56,62.92135,204.23125,1.2,1029.999971,0.0


In [None]:
# Expectation 1 : kolom `country_name` tidak boleh memiliki missing value

validator.expect_column_values_to_not_be_null('country_name')

Calculating Metrics: 100%|██████████| 6/6 [00:00<00:00, 146.41it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_not_be_null",
    "kwargs": {
      "batch_id": "csv-data-fact-fact_table",
      "column": "country_name"
    },
    "meta": {}
  },
  "result": {
    "element_count": 3649,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 2 : tipe data pada kolom 'access_electricity' harus integer atau float

validator.expect_column_values_to_be_in_type_list('access_electricity', ['integer', 'float'])

Calculating Metrics: 100%|██████████| 1/1 [00:00<00:00, 151.40it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_values_to_be_in_type_list",
    "kwargs": {
      "batch_id": "csv-data-fact-fact_table",
      "column": "access_electricity",
      "type_list": [
        "integer",
        "float"
      ]
    },
    "meta": {}
  },
  "result": {
    "observed_value": "float64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 3 : kolom 'year' harus ada

validator.expect_column_to_exist(column='year')

Calculating Metrics: 100%|██████████| 2/2 [00:00<00:00, 142.87it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_to_exist",
    "kwargs": {
      "batch_id": "csv-data-fact-fact_table",
      "column": "year"
    },
    "meta": {}
  },
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [None]:
# Expectation 4 : maksimum value di kolom 'access_electricity' adalah 100

validator.expect_column_max_to_be_between('access_electricity', 0, 100)

Calculating Metrics: 100%|██████████| 4/4 [00:00<00:00, 190.60it/s]


{
  "success": true,
  "expectation_config": {
    "type": "expect_column_max_to_be_between",
    "kwargs": {
      "batch_id": "csv-data-fact-fact_table",
      "column": "access_electricity",
      "min_value": 0.0,
      "max_value": 100.0
    },
    "meta": {}
  },
  "result": {
    "observed_value": 100.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}