# Edit Your Expectation Suite
Use this notebook to recreate and modify your expectation suite:

**Expectation Suite Name**: `small_clean_suite`


In [1]:
import datetime

import pandas as pd

import great_expectations as gx
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError

context = gx.get_context()

# Note that if you modify this batch request, you may save the new version as a .json file
#  to pass in later via the --batch-request option
batch_request = {'datasource_name': 'small_clean', 'data_connector_name': 'default_inferred_data_connector_name', 'data_asset_name': 'small_clean.csv', 'limit': 1000}


# Feel free to change the name of your suite here. Renaming this will not remove the other one.
expectation_suite_name = "small_clean_suite"
try:
    suite = context.get_expectation_suite(expectation_suite_name=expectation_suite_name)
    print(f'Loaded ExpectationSuite "{suite.expectation_suite_name}" containing {len(suite.expectations)} expectations.')
except DataContextError:
    suite = context.add_expectation_suite(expectation_suite_name=expectation_suite_name)
    print(f'Created ExpectationSuite "{suite.expectation_suite_name}".')


validator = context.get_validator(
    batch_request=BatchRequest(**batch_request),
    expectation_suite_name=expectation_suite_name
)
column_names = [f'"{column_name}"' for column_name in validator.columns()]
print(f"Columns: {', '.join(column_names)}.")
validator.head(n_rows=5, fetch_all=False)

2023-04-21T00:39:04-0400 - INFO - Great Expectations logging enabled at 20 level by JupyterUX module.
2023-04-21T00:39:04-0400 - INFO - FileDataContext loading fluent config
2023-04-21T00:39:04-0400 - INFO - Loading 'datasources' ->
[]
2023-04-21T00:39:04-0400 - INFO - Loaded 'datasources' ->
[]
Loaded ExpectationSuite "small_clean_suite" containing 0 expectations.


Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

Columns: "borough", "bin__", "house__", "street_name", "job__", "job_doc___", "job_type", "self_cert", "block", "lot", "community_board", "zip_code", "bldg_type", "residential", "special_district_1", "special_district_2", "work_type", "permit_status", "filing_status", "permit_type", "permit_sequence__", "permit_subtype", "oil_gas", "site_fill", "filing_date", "issuance_date", "expiration_date", "job_start_date", "permittee_s_first_name", "permittee_s_last_name", "permittee_s_business_name", "permittee_s_phone__", "permittee_s_license_type", "permittee_s_license__", "act_as_superintendent", "permittee_s_other_title", "hic_license", "site_safety_mgr_s_first_name", "site_safety_mgr_s_last_name", "site_safety_mgr_business_name", "superintendent_first___last_name", "superintendent_business_name", "owner_s_business_type", "non_profit", "owner_s_business_name", "owner_s_first_name", "owner_s_last_name", "owner_s_house__", "owner_s_house_street_name", "city", "state", "owner_s_zip_code", "owne

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,borough,bin__,house__,street_name,job__,job_doc___,job_type,self_cert,block,lot,community_board,zip_code,bldg_type,residential,special_district_1,special_district_2,work_type,permit_status,filing_status,permit_type,permit_sequence__,permit_subtype,oil_gas,site_fill,filing_date,issuance_date,expiration_date,job_start_date,permittee_s_first_name,permittee_s_last_name,permittee_s_business_name,permittee_s_phone__,permittee_s_license_type,permittee_s_license__,act_as_superintendent,permittee_s_other_title,hic_license,site_safety_mgr_s_first_name,site_safety_mgr_s_last_name,site_safety_mgr_business_name,superintendent_first___last_name,superintendent_business_name,owner_s_business_type,non_profit,owner_s_business_name,owner_s_first_name,owner_s_last_name,owner_s_house__,owner_s_house_street_name,city,state,owner_s_zip_code,owner_s_phone__,dobrundate,permit_si_no,gis_latitude,gis_longitude,gis_council_district,gis_census_tract,gis_nta_name,house_id,location_id,owner_id
0,MANHATTAN,1088749,1,MADISON AVE,141008987,1,A3,Y,853,2,105,10010.0,2.0,N,Not Available,Not Available,EQ,ISSUED,INITIAL,EQ,1,OT,N,Not Available,05/10/2022,05/10/2022,05/10/2023,05/10/2022,FRANKIE,COLLETTA,John Doe Inc,2122441000.0,GC,0,N,Not Available,Not Available,Not Available,Not Available,Not Available,John Doe,John Doe Inc,CORPORATION,N,John Doe Inc,JOHN,KRUSH,-1,Missing street name,Not Available,Not Available,0.0,2125943000.0,2022-05-11,3905851,40.740909,-73.987947,2.0,56.0,Hudson Yards-Chelsea-Flatiron-Union Square,1_40.740909_-73.987947,10010.0-853-2,JOHN-KRUSH-2125942700.0
1,STATEN ISLAND,5076937,87,BOYLAN STREET,540218539,1,A2,Y,5687,66,503,10312.0,1.0,YES,SRD,Not Available,OT,ISSUED,INITIAL,EW,1,OT,N,NOT APPLICABLE,05/10/2022,05/10/2022,10/01/2022,05/12/2022,PAUL,JOYCE,John Doe Inc,8055408000.0,GC,0,N,Not Available,Not Available,Not Available,Not Available,Not Available,John Doe,John Doe Inc,INDIVIDUAL,N,John Doe Inc,STEVEN,MONTANA,-1,Missing street name,Not Available,Not Available,0.0,9174202000.0,2022-05-11,3905852,40.563654,-74.179584,51.0,17008.0,Arden Heights,87_40.563654_-74.179584,10312.0-5687-66,STEVEN-MONTANA-9174201655.0
2,STATEN ISLAND,5001506,217,LAFAYETTE AVENUE,540218575,1,A2,Y,64,22,501,10301.0,1.0,YES,HS,Not Available,OT,ISSUED,INITIAL,EW,1,OT,N,NOT APPLICABLE,05/10/2022,05/10/2022,10/01/2022,05/15/2022,PAUL,JOYCE,John Doe Inc,8055408000.0,GC,0,N,Not Available,Not Available,Not Available,Not Available,Not Available,John Doe,John Doe Inc,INDIVIDUAL,N,John Doe Inc,SANTO,CEBALLOS,-1,Missing street name,Not Available,Not Available,0.0,7188126000.0,2022-05-11,3905853,40.639633,-74.094169,49.0,81.0,West New Brighton-New Brighton-St. George,217_40.639633_-74.094169,10301.0-64-22,SANTO-CEBALLOS-7188125847.0
3,STATEN ISLAND,5067021,170,OAKDALE STREET,540218600,1,A2,Y,5260,1,503,10308.0,1.0,YES,SRD,Not Available,OT,ISSUED,INITIAL,EW,1,OT,N,NOT APPLICABLE,05/10/2022,05/10/2022,10/01/2022,05/15/2022,PAUL,JOYCE,John Doe Inc,8055408000.0,GC,0,N,Not Available,Not Available,Not Available,Not Available,Not Available,John Doe,John Doe Inc,INDIVIDUAL,N,John Doe Inc,JOANNA,GIANGRANDE,-1,Missing street name,Not Available,Not Available,0.0,3478576000.0,2022-05-11,3905854,40.544597,-74.157153,51.0,15601.0,Great Kills,170_40.544597_-74.157153,10308.0-5260-1,JOANNA-GIANGRANDE-3478575846.0
4,STATEN ISLAND,5058036,273,10 STREET,540218628,1,A2,Y,4242,45,502,10306.0,1.0,YES,Not Available,Not Available,OT,ISSUED,INITIAL,EW,1,OT,N,NOT APPLICABLE,05/10/2022,05/10/2022,11/15/2022,05/24/2022,ALEXANDER,YACKERY,John Doe Inc,7183982000.0,GC,0,N,Not Available,Not Available,Not Available,Not Available,Not Available,John Doe,John Doe Inc,INDIVIDUAL,N,John Doe Inc,ARKADLY,FRIDMAN,-1,Missing street name,Not Available,Not Available,0.0,7186196000.0,2022-05-11,3905855,40.566798,-74.119726,50.0,134.0,New Dorp-Midland Beach,273_40.566798_-74.119726,10306.0-4242-45,ARKADLY-FRIDMAN-7186195891.0


## Create & Edit Expectations


Add expectations by calling specific expectation methods on the `validator` object. They all begin with `.expect_` which makes autocompleting easy using tab.

Because you selected interactive mode, you are now creating or editing an Expectation Suite with validator feedback from the sample batch of data that you specified (see `batch_request`).

Note that if you select manual mode you may still create or edit an Expectation Suite directly, without feedback from the `validator`. See our documentation for more info and examples: [How to create a new Expectation Suite without a sample batch](https://docs.greatexpectations.io/docs/guides/expectations/how_to_create_and_edit_expectations_based_on_domain_knowledge_without_inspecting_data_directly).



You can see all the available expectations in the **[expectation gallery](https://greatexpectations.io/expectations)**.

### Table Expectation(s)

No table level expectations are in this suite. Feel free to add some here.

They all begin with `validator.expect_table_...`.


### Column Expectation(s)

No column level expectations are in this suite. Feel free to add some here.

They all begin with`validator.expect_column_...`.


In [2]:
validator.expect_column_values_to_not_be_null(column='bin__')
validator.expect_column_values_to_not_be_null(column='borough')
validator.expect_column_values_to_not_be_null(column='house__')
validator.expect_column_values_to_not_be_null(column='street_name')
validator.expect_column_values_to_not_be_null(column='job__')
validator.expect_column_values_to_not_be_null(column='job_doc___')
validator.expect_column_values_to_not_be_null(column='job_type')
validator.expect_column_values_to_not_be_null(column='self_cert')
validator.expect_column_values_to_not_be_null(column='block')
validator.expect_column_values_to_not_be_null(column='lot')
validator.expect_column_values_to_not_be_null(column='community_board')
validator.expect_column_values_to_not_be_null(column='zip_code')
validator.expect_column_values_to_not_be_null(column='bldg_type')
validator.expect_column_values_to_not_be_null(column='residential')
validator.expect_column_values_to_not_be_null(column='special_district_1')
validator.expect_column_values_to_not_be_null(column='special_district_2')
validator.expect_column_values_to_not_be_null(column='work_type')
validator.expect_column_values_to_not_be_null(column='permit_status')
validator.expect_column_values_to_not_be_null(column='filing_status')
validator.expect_column_values_to_not_be_null(column='permit_type')
validator.expect_column_values_to_not_be_null(column='permit_sequence__')
validator.expect_column_values_to_not_be_null(column='permit_subtype')
validator.expect_column_values_to_not_be_null(column='oil_gas')
validator.expect_column_values_to_not_be_null(column='site_fill')
validator.expect_column_values_to_not_be_null(column='filing_date')
validator.expect_column_values_to_not_be_null(column='issuance_date')
validator.expect_column_values_to_not_be_null(column='expiration_date')
validator.expect_column_values_to_not_be_null(column='job_start_date')
validator.expect_column_values_to_not_be_null(column='permittee_s_first_name')
validator.expect_column_values_to_not_be_null(column='permittee_s_last_name')
validator.expect_column_values_to_not_be_null(column='permittee_s_business_name')
validator.expect_column_values_to_not_be_null(column='permittee_s_phone__')
validator.expect_column_values_to_not_be_null(column='permittee_s_license_type')
validator.expect_column_values_to_not_be_null(column='permittee_s_license__')
validator.expect_column_values_to_not_be_null(column='act_as_superintendent')
validator.expect_column_values_to_not_be_null(column='permittee_s_other_title')
validator.expect_column_values_to_not_be_null(column='hic_license')
validator.expect_column_values_to_not_be_null(column='site_safety_mgr_s_first_name')
validator.expect_column_values_to_not_be_null(column='site_safety_mgr_s_last_name')
validator.expect_column_values_to_not_be_null(column='site_safety_mgr_business_name')
validator.expect_column_values_to_not_be_null(column='superintendent_first___last_name')
validator.expect_column_values_to_not_be_null(column='superintendent_business_name')
validator.expect_column_values_to_not_be_null(column='owner_s_business_type')
validator.expect_column_values_to_not_be_null(column='non_profit')
validator.expect_column_values_to_not_be_null(column='owner_s_business_name')
validator.expect_column_values_to_not_be_null(column='owner_s_first_name')
validator.expect_column_values_to_not_be_null(column='owner_s_last_name')
validator.expect_column_values_to_not_be_null(column='owner_s_house__')
validator.expect_column_values_to_not_be_null(column='owner_s_house_street_name')
validator.expect_column_values_to_not_be_null(column='city')
validator.expect_column_values_to_not_be_null(column='state')
validator.expect_column_values_to_not_be_null(column='owner_s_zip_code')
validator.expect_column_values_to_not_be_null(column='owner_s_phone__')
validator.expect_column_values_to_not_be_null(column='dobrundate')
validator.expect_column_values_to_not_be_null(column='permit_si_no')
validator.expect_column_values_to_not_be_null(column='gis_latitude')
validator.expect_column_values_to_not_be_null(column='gis_longitude')
validator.expect_column_values_to_not_be_null(column='gis_council_district')
validator.expect_column_values_to_not_be_null(column='gis_census_tract')
validator.expect_column_values_to_not_be_null(column='gis_nta_name')
validator.expect_column_values_to_not_be_null(column='house_id')
validator.expect_column_values_to_not_be_null(column='location_id')
validator.expect_column_values_to_not_be_null(column='owner_id')


validator.expect_column_values_to_be_in_set(column='borough', value_set=['MANHATTAN', 'BRONX', 'QUEENS', 'BROOKLYN', 'STATEN ISLAND'])

validator.expect_column_values_to_be_in_set(column='job_type', value_set=['A1', 'A2', 'A3','NB','DM','SG'])

validator.expect_column_values_to_be_in_set(column='filing_status', value_set=["INITIAL","RENEWAL"])

validator.expect_column_values_to_be_in_set(column='permit_status', value_set=["IN PROCESS","ISSUED","RE-ISSUED","REVOKED"])

validator.expect_column_values_to_be_in_set(column='bldg_type', value_set=[1.0,2.0])


Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "meta": {},
  "success": true,
  "result": {
    "element_count": 1000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  }
}

## Review & Save Your Expectations

Let's save the expectation suite as a JSON file in the `great_expectations/expectations` directory of your project.

Let's now rebuild your Data Docs, which helps you communicate about your data with both machines and humans.

In [3]:
print(validator.get_expectation_suite(discard_failed_expectations=False))
validator.save_expectation_suite(discard_failed_expectations=False)

checkpoint_config = {
    "class_name": "SimpleCheckpoint",
    "validations": [
        {
            "batch_request": batch_request,
            "expectation_suite_name": expectation_suite_name
        }
    ]
}
checkpoint = SimpleCheckpoint(
    f"{validator.active_batch_definition.data_asset_name}_{expectation_suite_name}",
    context,
    **checkpoint_config
)
checkpoint_result = checkpoint.run()

context.build_data_docs()

validation_result_identifier = checkpoint_result.list_validation_result_identifiers()[0]
context.open_data_docs(resource_identifier=validation_result_identifier)

2023-04-21T00:39:39-0400 - INFO - 	68 expectation(s) included in expectation_suite.
{
  "ge_cloud_id": null,
  "expectations": [
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "bin__"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "borough"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "house__"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "street_name"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "job__"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "kwargs": {
        "column": "job_doc___"
 

2023-04-21T00:39:39-0400 - INFO - 	68 expectation(s) included in expectation_suite.


Calculating Metrics:   0%|          | 0/343 [00:00<?, ?it/s]