In [1]:
from sibyl.core import Sibyl
from mongoengine import disconnect

In [2]:
database = "housing"

In [3]:
disconnect()
config = {
    "mongodb": {
        "db": database,
        "host": "localhost",
        "port": 27017,
        "username": None,
        "password": None,
    },
    "log_filename": "test.csv",
    "feature_distribution_location": None,
    "flask": {},
}
explorer = Sibyl(config, docker=False)
app = explorer._init_flask_app("test")
client = app.test_client()

In [4]:
# TEST ENTITY APIs
eid = None

response = client.get("/api/v1/entities/")
assert response.status_code == 200
assert "entities" in response.json
print("Number of entities:", len(response.json["entities"]))

if eid is None:
    eid = response.json["entities"][0]["eid"]
eids = [response.json["entities"][i]["eid"] for i in range(0, 3)]

response = client.get("/api/v1/entities/" + eid + "/")
assert response.status_code == 200
assert response.json["eid"] == eid
assert "features" in response.json
assert "property" in response.json
sample_entity = response.json

Number of entities: 1456


In [5]:
# TEST FEATURE APIs

response = client.get("/api/v1/features/")
assert response.status_code == 200
assert "features" in response.json
sample_features = response.json["features"]
print("Number of features:", len(sample_features))
sample_feature = sample_features[0]

response = client.get("/api/v1/features/" + sample_feature["name"] + "/")
assert response.status_code == 200
assert "name" in response.json
print("Sample feature:", response.json)

response = client.get("/api/v1/categories/")
assert response.status_code == 200
assert "categories" in response.json
print("Sample category", response.json["categories"][0])

Number of features: 79
Sample feature: {'name': 'MSSubClass', 'description': ' Identifies the type of dwelling involved in the sale.', 'type': 'categorical', 'category': 'general', 'values': ['20', '60', '70', '50', '190', '45', '90', '120', '30', '85', '80', '160', '75', '180', '40'], 'negated_description': None}
Sample category {'name': 'size', 'color': None, 'abbreviation': None}


In [6]:
# TEST MODEL APIs
response = client.get("/api/v1/models/")
assert response.status_code == 200
print("Number of models:", len(response.json["models"]))
sample_model = response.json["models"][0]
model_id = sample_model["model_id"]

response = client.get("/api/v1/models/" + model_id + "/")
assert response.status_code == 200
print("Sample model:", response.json)

response = client.get("/api/v1/importance/?model_id=" + model_id)
assert response.status_code == 200
assert "importances" in response.json
print("Sample importance", next(iter(response.json["importances"].items())))

response = client.get("/api/v1/prediction/?model_id=" + model_id + "&eid=" + eid)
assert response.status_code == 200
print("Sample prediction:", response.json)
prediction = response.json["output"]

Number of models: 1
Sample model: {'model_id': 'housing_model', 'description': '', 'performance': ''}
Sample importance ('MSSubClass', 1783.8738895418035)


Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


Sample prediction: {'output': 157585.2499694067}


In [7]:
# TEST CONTEXTS

response = client.get("/api/v1/contexts/")
assert response.status_code == 200
context_id = response.json["contexts"][0]["context_id"]

response = client.get("/api/v1/context/" + context_id + "/")
assert response.status_code == 200
print("Sample context:", response.json)

Sample context: {'context': {'context_id': 'context', 'config': {'output_preset': 'profit_usd', 'output_type': 'numeric', 'output_pos_label': None, 'output_neg_label': None, 'output_format_string': '${:,.2f}', 'output_sentiment_is_negative': False, 'terms': {'entity': 'House', 'feature': 'Factor', 'prediction': 'Sale Price', 'positive': 'Beneficial', 'negative': 'Detrimental'}}}}


In [8]:
# TEST COMPUTING
import pandas as pd

response = client.post("/api/v1/contributions/", json={"eid": eid, "model_id": model_id})
assert response.status_code == 200
assert "result" in response.json
print("Sample contribution", next(iter(response.json["result"].items())))

response = client.post("/api/v1/multi_contributions/", json={"eids": eids, "model_id": model_id})
assert response.status_code == 200
assert "contributions" in response.json
for eid in response.json["contributions"]:
    try:
        contributions_df = pd.DataFrame.from_dict(
            response.json["contributions"][eid], orient="index"
        )
        values_df = pd.DataFrame.from_dict(response.json["values"][eid], orient="index")
    except Exception as e:
        print("Error with multi-contributions - wrong format")
        raise e
    assert contributions_df.shape == values_df.shape
print("Multi-contributions validated")

row_ids = list(sample_entity["features"].keys())

changes = {
    sample_features[0]["name"]: sample_entity["features"][row_ids[0]][sample_features[0]["name"]],
    sample_features[1]["name"]: sample_entity["features"][row_ids[0]][sample_features[1]["name"]],
}
response = client.post(
    "/api/v1/modified_prediction/", json={"eid": eid, "model_id": model_id, "changes": changes}
)
assert response.status_code == 200
assert "prediction" in response.json
print("Sample modified prediction:", response.json)

response = client.post(
    "/api/v1/single_change_predictions/",
    json={"eid": eid, "model_id": model_id, "changes": changes},
)
assert response.status_code == 200
assert "predictions" in response.json
print("Sample predictions:", response.json["predictions"])

Sample contribution ('MSSubClass', {'Feature Value': 20, 'Contribution': 1719.226755355572, 'Average/Mode': 20.0})
Multi-contributions validated
Sample modified prediction: {'prediction': 200785.20760564454}
Sample predictions: [['MSSubClass', 200785.20760564454], ['MSZoning', 200785.20760564454]]


In [9]:
# TEST COMPUTING - PART 2
response = client.post(
    "/api/v1/modified_contribution/",
    json={"eid": eid, "model_id": model_id, "changes": changes},
)
assert response.status_code == 200
assert "contributions" in response.json
assert "values" in response.json
try:
    contribution_df = pd.DataFrame.from_dict(response.json["contributions"], orient="index")
    print("Sample contribution object:\n", contribution_df[contribution_df.columns[0]])
    value_df = pd.DataFrame.from_dict(response.json["values"], orient="index")
except Exception as e:
    print("Error with modified-contributions - wrong format")
    raise e
assert contribution_df.shape == value_df.shape

response = client.post("/api/v1/similar_entities/", json={"eids": eids, "model_id": model_id})
assert response.status_code == 200
assert "similar_entities" in response.json
print_flag = True
for eid in response.json["similar_entities"]:
    assert "X" in response.json["similar_entities"][eid]
    try:
        X = pd.DataFrame.from_dict(
            response.json["similar_entities"][eid]["X"], orient="index"
        ).head()
        if print_flag:
            print("Sample similar entity:\n", X)
    except Exception as e:
        print("Error with similar entities - wrong X format")
        raise e
    assert "y" in response.json["similar_entities"][eid]
    try:
        y = pd.Series(response.json["similar_entities"][eid]["y"]).head()
        if print_flag:
            print("y:\n", y)
    except Exception as e:
        print("Error with similar entities - wrong y format")
        raise e
    try:
        input = pd.Series(response.json["similar_entities"][eid]["Input"]).head()
        if print_flag:
            print("Input row:\n", input)
            print_flag = False
    except Exception as e:
        print("Error with similar entities - wrong input format")
        raise e

Sample contribution object:
 0    1719.226755
Name: MSSubClass, dtype: float64
Sample similar entity:
       MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0             20       RL         75.0     9937   Pave  None      Reg   
1100          20       RL         61.0     9758   Pave  None      IR1   
1257          20       RL         65.0     9750   Pave  None      Reg   

     LandContour Utilities LotConfig  ... ScreenPorch PoolArea PoolQC Fence  \
0            Lvl    AllPub    Inside  ...           0        0   None  None   
1100         Lvl    AllPub    Inside  ...           0        0   None  None   
1257         Lvl    AllPub       FR2  ...          80        0   None  None   

     MiscFeature MiscVal  MoSold  YrSold  SaleType  SaleCondition  
0           None       0       6    2008        WD         Normal  
1100        None       0       7    2007        WD         Normal  
1257        None       0       7    2008        WD         Normal  

[3 rows x 79 c