Skip to content

Commit

Permalink
Merge pull request #133 from moj-analytical-services/vif2
Browse files Browse the repository at this point in the history
tighten tests to check exact equivalence
  • Loading branch information
mamonu committed Nov 9, 2020
2 parents 51a84f1 + 8084c58 commit 862558a
Showing 1 changed file with 12 additions and 9 deletions.
21 changes: 12 additions & 9 deletions tests/test_diagnostics.py
Expand Up @@ -2,29 +2,32 @@
import pandas as pd
from splink.gammas import add_gammas
from splink.diagnostics import vif_gammas
import pytest


# For further info about this test see https://github.com/moj-analytical-services/splink/issues/132
def test_vif_gammas(spark, gamma_settings_4, params_4, sqlite_con_4):

"""
use fixture data that is independent to ensure vif_gammas works with typical data
use fixture data that is independent to ensure vif_gammas works with typical data
(in this case with low association / correlation between columns)
"""

dfpd = pd.read_sql("select * from df", sqlite_con_4)
df_gammas = spark.createDataFrame(dfpd)
df_gammas = df_gammas.filter("true_match = 0")

res = vif_gammas(df_gammas, spark=spark, sampleratio=0.05).toPandas()
res = vif_gammas(df_gammas, spark=spark, sampleratio=1.0).toPandas()

assert ((res.vif.values < 10) & (res.vif.values > 1)).all() == True
for val in res.vif.values:
assert pytest.approx(val) == 1.0


def test_vif_no_gammas(spark, gamma_settings_4, params_4, sqlite_con_4):

"""
test that when input doesnt have gamma columns function exits gracefully
"""

dfpd = pd.read_sql("select * from df", sqlite_con_4)
Expand All @@ -38,7 +41,7 @@ def test_vif_no_gammas(spark, gamma_settings_4, params_4, sqlite_con_4):


def test_vif_fully_correlated(spark, gamma_settings_4, params_4, sqlite_con_4):

"""
test that when input data has some columns that are fully correlated / associated function deals with it gracefully
"""
Expand Down

0 comments on commit 862558a

Please sign in to comment.