In [0]:
%pip install -r requirements.txt

In [0]:
dbutils.library.restartPython()


In [0]:
from pathlib import Path
import pytest, shutil, os

# Find repo root = parent of the current notebook folder
repo_root = Path.cwd()  # when running inside /Workspace/.../user-engagement-analysis
tests_dir = repo_root / "tests"

# Copy to tmp (avoids __pycache__ issues on /Workspace)
dst_dir = Path("/tmp/tests")
if dst_dir.exists():
    shutil.rmtree(dst_dir)
shutil.copytree(tests_dir, dst_dir)

exit_code = pytest.main([
    str(dst_dir),
    "-v", "-rP",
    "-o", "cache_dir=/tmp/pytest_cache",
    "-W", "ignore:distutils Version classes are deprecated:DeprecationWarning",
    "-W", "ignore::DeprecationWarning:pyspark.sql.pandas.utils",
])
print("pytest exit code:", exit_code)


In [None]:
from tests.test_contract_dq import ContractDQ, _valid_df
from pyspark.sql import SparkSession

# Set up Spark session
spark = SparkSession.builder.getOrCreate()

# Create a sample DataFrame
sample_df = _valid_df(spark)

# Run ContractDQ tests
dq = ContractDQ(sample_df)
dq.test_schema_exact()
dq.test_no_extra_columns()
dq.test_required_not_null_and_ranges_and_domain()
dq.test_timestamp_parseable()

# Export invalid rows to DBFS path
safe_path = 'dbfs:/FileStore/gore_logs'
dq.export_invalid_to_csv(safe_path)
print(f"Exported invalid rows to: {safe_path}")
