In [1]:
import os
import re

import great_expectations as ge
import numpy as np
import pandas as pd
import rad_pipeline.rad_pipeline as rp
import rad_pipeline.zipcodes as zc

## ASHP

In [2]:
ashp = ge.from_pandas(rp.load_ashp())

In [3]:
field_map = rp.FIELDS['Air-source Heat Pumps']
for key_col in field_map:
    result = ashp.expect_column_to_exist(field_map[key_col])
    if not result["success"]:
        print("Failure!")
        print(result)

In [4]:
# Expect zipcode to be mostly numbers and mostly populated

In [5]:
ashp.expect_column_values_to_not_be_null(field_map['zip'])

{
  "meta": {},
  "result": {
    "element_count": 19964,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "partial_unexpected_list": []
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [6]:
ashp.expect_column_values_to_be_in_type_list(field_map['zip'], ['int', 'float', 'str'])

{
  "meta": {},
  "result": {
    "element_count": 19964,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [7]:
ashp.expect_column_values_to_match_regex(field_map['zip'], r"^\s*([0-9]{3,5})(?:[.]0)?(?:-([0-9]{4})|-)?\s*$", mostly=0.98)

{
  "meta": {},
  "result": {
    "element_count": 19964,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 3,
    "unexpected_percent": 0.015027048687637747,
    "unexpected_percent_total": 0.015027048687637747,
    "unexpected_percent_nonmissing": 0.015027048687637747,
    "partial_unexpected_list": [
      20,
      "019081047",
      "0212y"
    ]
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [8]:
# Expect rebate and cost to be mostly numbers

In [9]:
ashp.expect_column_values_to_be_in_type_list(field_map['rebate'], ['int', 'float'], mostly=0.99)

{
  "meta": {},
  "result": {
    "element_count": 19964,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 1,
    "unexpected_percent": 0.005009016229212583,
    "unexpected_percent_total": 0.005009016229212583,
    "unexpected_percent_nonmissing": 0.005009016229212583,
    "partial_unexpected_list": [
      "Not Applicable"
    ]
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
ashp.expect_column_values_to_be_in_type_list(field_map['cost'], ['int', 'float'], mostly=0.99)

{
  "meta": {},
  "result": {
    "element_count": 19964,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 19,
    "unexpected_percent": 0.09517130835503908,
    "unexpected_percent_total": 0.09517130835503908,
    "unexpected_percent_nonmissing": 0.09517130835503908,
    "partial_unexpected_list": [
      "1000-labor only",
      "1200-labor only",
      "31900-with boiler",
      "925-labor only",
      "18668.75-with 5C42 unit",
      "1000-labor only",
      "25490-with Bosch",
      "57814.74-with Trane",
      "22725-with Lennox equip",
      "17800-with other ac system",
      "903.01-labor only",
      "425-labor only",
      "12330-with Lennox thing",
      "53000-with furnace ",
      "30500-with Luxiare equip",
      "25091-with Carrier equip",
      "41430-with additional multi head",
      " $ 9000.00",
      "$19.524.15"
    ]
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "excep

In [11]:
ashp.save_expectation_suite("../data/expectations/ashp_raw_expectations.json")

## GSHP

## Solar

## Electric Vehicles (EVs)