In [10]:
from sibyl.core import Sibyl
from mongoengine import disconnect

In [11]:
database = "housing"

In [12]:
disconnect()
config = {
    "mongodb": {
        "db": database,
        "host": "localhost",
        "port": 27017,
        "username": None,
        "password": None,
    },
    "log_filename": "test.csv",
    "feature_distribution_location": None,
    "flask": {},
}
explorer = Sibyl(config, docker=False)
app = explorer._init_flask_app("test")
client = app.test_client()

In [13]:
# TEST ENTITY APIs
eid = None

response = client.get("/api/v1/entities/")
assert response.status_code == 200
assert "entities" in response.json
print("Number of entities:", len(response.json["entities"]))

if eid is None:
    eid = response.json["entities"][0]["eid"]
eids = [response.json["entities"][i]["eid"] for i in range(0, 3)]

response = client.get("/api/v1/entities/" + eid + "/")
assert response.status_code == 200
assert response.json["eid"] == eid
assert "features" in response.json
assert "property" in response.json
sample_entity = response.json

Number of entities: 1456


In [14]:
# TEST FEATURE APIs

response = client.get("/api/v1/features/")
assert response.status_code == 200
assert "features" in response.json
print("Number of features:", len(response.json["features"]))
sample_feature = response.json["features"][0]

response = client.get("/api/v1/features/" + sample_feature["name"] + "/")
assert response.status_code == 200
assert "name" in response.json
print("Sample feature:", response.json)

response = client.get("/api/v1/categories/")
assert response.status_code == 200
assert "categories" in response.json
print("Sample category", response.json["categories"][0])

Number of features: 79
Sample feature: {'name': 'MSSubClass', 'description': ' Identifies the type of dwelling involved in the sale.', 'type': 'category', 'negated_description': None, 'category': 'general'}
Sample category {'name': 'general', 'color': None, 'abbreviation': None}


In [15]:
# TEST MODEL APIs
response = client.get("/api/v1/models/")
assert response.status_code == 200
print("Number of models:", len(response.json["models"]))
sample_model = response.json["models"][0]
model_id = sample_model["id"]

response = client.get("/api/v1/models/" + model_id + "/")
assert response.status_code == 200
print("Sample model:", response.json)

response = client.get("/api/v1/importance/?model_id=" + model_id)
assert response.status_code == 200
assert "importances" in response.json
print("Sample importance", next(iter(response.json["importances"].items())))

response = client.get("/api/v1/prediction/?model_id=" + model_id + "&eid=" + eid)
assert response.status_code == 200
print("Sample prediction:", response.json)
prediction = response.json["output"]

Number of models: 1
Sample model: {'id': '64f8d56cb15bf0bb27c103ea', 'name': 'placeholder', 'description': 'placeholder', 'performance': 'placeholder'}
Sample importance ('MSSubClass', 1783.8738895418035)
Sample prediction: {'output': 136805.84817939159}


In [16]:
# TEST CONTEXTS

response = client.get("/api/v1/contexts/")
assert response.status_code == 200
context_id = response.json["contexts"][0]["id"]

response = client.get("/api/v1/context/" + context_id + "/")
assert response.status_code == 200
print("Sample context:", response.json)

Sample context: {'context': {'id': '64f8d567b15bf0bb27c0fe38', 'terms': {'Positive': 'Beneficial', 'Negative': 'Detrimental', 'Feature': 'Factor', 'Prediction': 'Predicted Price', 'Feature Contributions': 'Factor Contributions', 'Counterfactuals': 'Sandbox', 'Feature Importance': 'Factor Importance', 'Feature Distributions': 'Factor Distributions', 'Entity': 'House'}, 'gui_preset': 'profit_usd', 'gui_config': {'pred_type': 'numeric', 'model_pred_bad_outcome': False, 'pred_format_string': '${:,.2f}'}}}


In [17]:
# TEST COMPUTING
import pandas as pd

response = client.post("/api/v1/contributions/", json={"eid": eid, "model_id": model_id})
assert response.status_code == 200
assert "contributions" in response.json
print("Sample contribution", next(iter(response.json["contributions"].items())))

response = client.post("/api/v1/multi_contributions/", json={"eids": eids, "model_id": model_id})
assert response.status_code == 200
assert "contributions" in response.json
for eid in response.json["contributions"]:
    try:
        pd.read_json(response.json["contributions"][eid], orient="index")
    except Exception:
        print("Error with multi-contributions - wrong format")
print("Multi-contributions validated")

row_ids = list(sample_entity["features"].keys())
features = list(sample_entity["features"][row_ids[0]].keys())

changes = {
    features[0]: sample_entity["features"][row_ids[0]][features[0]],
    features[1]: sample_entity["features"][row_ids[0]][features[1]],
}
response = client.post(
    "/api/v1/modified_prediction/", json={"eid": eid, "model_id": model_id, "changes": changes}
)
assert response.status_code == 200
assert "prediction" in response.json
print("Sample modified prediction:", response.json)

response = client.post(
    "/api/v1/single_change_predictions/",
    json={"eid": eid, "model_id": model_id, "changes": changes},
)
assert response.status_code == 200
assert "predictions" in response.json
print("Sample predictions:", response.json["predictions"])

Sample contribution ('MSSubClass', 1719.226755355572)
Multi-contributions validated
Sample modified prediction: {'prediction': 183246.9701684981}
Sample predictions: [['MSSubClass', 183246.9701684981], ['MSZoning', 183246.9701684981]]


In [18]:
# TEST COMPUTING - PART 2
response = client.post(
    "/api/v1/modified_contribution/",
    json={"eid": eid, "model_id": model_id, "changes": changes},
)
assert response.status_code == 200
assert "contribution" in response.json
try:
    df = pd.read_json(response.json["contribution"], orient="index")
    print("Sample contribution object:", df[df.columns[0]])
except Exception:
    print("Error with modified-contributions - wrong format")

response = client.post("/api/v1/similar_entities/", json={"eids": eids, "model_id": model_id})
assert response.status_code == 200
assert "similar_entities" in response.json
print_flag = True
for eid in response.json["similar_entities"]:
    assert "X" in response.json["similar_entities"][eid]
    try:
        X = pd.read_json(response.json["similar_entities"][eid]["X"], orient="index").head()
        if print_flag:
            print("Sample similar entity:\n", X)
    except Exception:
        print("Error with similar entities - wrong X format")
    assert "y" in response.json["similar_entities"][eid]
    try:
        y = pd.read_json(response.json["similar_entities"][eid]["y"], orient="index").head()
        if print_flag:
            print(y)
            print_flag = False
    except Exception:
        print("Error with similar entities - wrong y format")

Sample contribution object: MSSubClass           20
LotFrontage        80.0
LotArea            9600
OverallQual           6
OverallCond           8
                  ...  
PoolQC               Fa
Fence             MnPrv
MiscFeature        Shed
SaleType             WD
SaleCondition    Normal
Name: Feature Value, Length: 79, dtype: object
Sample similar entity:
      MSSubClass MSZoning  LotFrontage  LotArea Street  Alley LotShape  \
0            20       RL           75     9937   Pave    NaN      Reg   
202          20       RL           75    10125   Pave    NaN      Reg   
562          20       RL           77    10010   Pave    NaN      Reg   

    LandContour Utilities LotConfig  ... ScreenPorch PoolArea PoolQC  Fence  \
0           Lvl    AllPub    Inside  ...           0        0    NaN   None   
202         Lvl    AllPub    Inside  ...           0        0    NaN  MnPrv   
562         Lvl    AllPub    Inside  ...           0        0    NaN   None   

    MiscFeature MiscVal  Mo