<a href="https://colab.research.google.com/github/tnc-br/ddf_common/blob/bqddf/harness_api_integration_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Colab by default is on an old version of the bigquery API unfortunately, so update to latest version.

In [None]:
%pip install --upgrade google-cloud-bigquery

## Imports

In [1]:
# This stub (ddfimport) allows the Ddf EE API to be imported.
import sys
!if [ ! -d "/content/ddf_common_stub" ] ; then git clone -b test https://github.com/tnc-br/ddf_common_stub.git; fi
sys.path.append("/content/ddf_common_stub/")
import ddfimport

# Use this line to import from a branch of the github repository.
# It will git clone the git repository under a google drive path.
# This allows you to modify the source files by opening the file view and
# changing files under /content/gdrive/MyDrive/<branch_name>
ddfimport.ddf_source_control_pane()

# Alternatively, you can use this line to import from Main.
# If you import from Main, you will not be able to change files, but will not
# need a Google Login for Google Drive.
#ddfimport.ddf_import_common()

Cloning into 'ddf_common_stub'...
remote: Enumerating objects: 18, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 18 (delta 7), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (18/18), 7.36 KiB | 1.84 MiB/s, done.
Resolving deltas: 100% (7/7), done.


interactive(children=(Text(value='', description='Email', placeholder='Enter email'), Text(value='', descripti…

Authenticate

In [2]:
import importlib
import eeddf
import bqddf
importlib.reload(eeddf)
importlib.reload(bqddf)

eeddf.initialize_ddf(test_environment = True)

## Create fake tables for testing.

In [4]:
from google.cloud import bigquery
import time

client = bqddf._get_big_query_client()

# Create a minimal metadata schema
metadata_schema = [
    bigquery.SchemaField("eval_id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("baseline_id", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("experiment_id", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("completion_timestamp", "TIMESTAMP", mode="NULLABLE"),
]

# Modify config values for test.
bqddf._TEST_CONFIG['METADATA_TABLE'] = f'test_{int(time.time())}'

# table_id should evaluate to project.dataset.test-table
table_id = f"{bqddf._TEST_CONFIG['PROJECT_NAME']}.{bqddf._TEST_CONFIG['DATASET']}.{bqddf._TEST_CONFIG['METADATA_TABLE']}"
table = bigquery.Table(table_id, schema=metadata_schema)

# If it exists, delete it first.
client.delete_table(table_id, not_found_ok=True)

# Create the test table.
table = client.create_table(table)
print(
    "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)

Created table river-sky-386919.harness_test_db.test_1709915066


In [5]:
results_schema = [
    bigquery.SchemaField("eval_id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("max_fraud_radius", "FLOAT", mode="NULLABLE"),
    bigquery.SchemaField("pr_curve", "RECORD", mode="REPEATED", fields=[
        bigquery.SchemaField("precision", "FLOAT", mode="NULLABLE"),
        bigquery.SchemaField("recall", "FLOAT", mode="NULLABLE"),
    ])
]

# Modify config values for test.
bqddf._TEST_CONFIG['RESULTS_TABLE'] = bqddf._TEST_CONFIG['METADATA_TABLE'] + '_results'

# table_id should evaluate to project.dataset.test-table
results_table_id = f"{bqddf._TEST_CONFIG['PROJECT_NAME']}.{bqddf._TEST_CONFIG['DATASET']}.{bqddf._TEST_CONFIG['RESULTS_TABLE']}"
results_table = bigquery.Table(results_table_id, schema=metadata_schema)

# If it exists, delete it first.
client.delete_table(results_table_id, not_found_ok=True)

# Create the test table.
results_table = client.create_table(results_table)
print(
    "Created table {}.{}.{}".format(
        results_table.project, results_table.dataset_id, results_table.table_id)
)

Created table river-sky-386919.harness_test_db.test_1709915066_results


In [6]:
import json

def test_read_and_write():
  eval_metadata = {
    "baseline_id" : "baseline_2",
    "experiment_id" : "fake_experiment_1"
  }

  pr_curve_1 = {
      "max_fraud_radius" : 10,
      "pr_curve" : [
          {"precision" : 0.9, "recall" : 0.0},
          {"precision" : 0.8, "recall" : 0.1},
          {"precision" : 0.7, "recall" : 0.2},
      ]
  }

  pr_curve_2 = {
      "max_fraud_radius" : 20,
      "pr_curve" : [
          {"precision" : 0.9, "recall" : 0.0},
          {"precision" : 0.8, "recall" : 0.1},
          {"precision" : 0.7, "recall" : 0.2},
      ]
  }

  expected_eval_id = bqddf._generate_eval_id(eval_metadata)
  assert bqddf.insert_eval(
      eval_metadata, [pr_curve_1, pr_curve_2]) == expected_eval_id

  result = bqddf.get_eval_result(expected_eval_id)
  assert result.total_rows == 1

## Run the test.

In [7]:
test_read_and_write()

## Delete the test table

In [8]:
client.delete_table(table_id)
client.delete_table(results_table_id)