# Configure the Profiler

In [58]:
import great_expectations as ge
from ruamel.yaml import YAML
from great_expectations.rule_based_profiler.rule_based_profiler import RuleBasedProfiler
from great_expectations.core.batch import BatchRequest

yaml = YAML()

# context = gx.get_context()
data_context = ge.data_context.DataContext()

In [43]:
profiler_config = """
    name: My Profiler
    config_version: 1.0

    variables:
      false_positive_rate: 0.01
      mostly: 1.0

    rules:
      row_count_rule:
        domain_builder:
            class_name: TableDomainBuilder
        parameter_builders:
          - name: row_count_range
            class_name: NumericMetricRangeMultiBatchParameterBuilder
            metric_name: table.row_count
            metric_domain_kwargs: $domain.domain_kwargs
            false_positive_rate: $variables.false_positive_rate
            truncate_values:
              lower_bound: 0
            round_decimals: 0
        expectation_configuration_builders:
          - expectation_type: expect_table_row_count_to_be_between
            class_name: DefaultExpectationConfigurationBuilder
            module_name: great_expectations.rule_based_profiler.expectation_configuration_builder
            min_value: $parameter.row_count_range.value[0]
            max_value: $parameter.row_count_range.value[1]
            mostly: $variables.mostly
            meta:
              profiler_details: $parameter.row_count_range.details
"""

# Run the Profiler

First we'll do some basic set-up - set up a Data Context and parse our YAML

In [44]:


full_profiler_config_dict= yaml.load(profiler_config)

Next, we'll instantiate our Profiler, passing in our config and our Data Context

In [45]:
rule_based_profiler = RuleBasedProfiler(
    name=full_profiler_config_dict["name"],
    config_version=full_profiler_config_dict["config_version"],
    rules=full_profiler_config_dict["rules"],
    variables=full_profiler_config_dict["variables"],
    data_context=data_context,
)

Finally, we'll run the profiler and save the result to a variable.

In [59]:
batch_request = {
    "datasource_name": "rule_based_datasource",
    "data_connector_name": "default_inferred_data_connector_name",
    "data_asset_name": "my_reports",
    "data_connector_query": {
        "index": "-6:-1",
    },
}



In [57]:
result = rule_based_profiler.run(batch_request=batch_request)




Generating Expectations:   0%|          | 0/1 [00:00<?, ?it/s]

Profiling Dataset:         0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

In [60]:
expectation_suite_name = "rule_based_expectations"
validator = context.get_validator(
    batch_request=BatchRequest(**batch_request),
    expectation_suite_name=expectation_suite_name,
)
validator.head(n_rows=5, fetch_all=False)

# validator.expectation_suite = result.get_expectation_suite(
#     expectation_suite_name=expectation_suite_name
# )

DataContextError: expectation_suite rule_based_expectations not found