# Load Senzing truth-sets

These instructions load the [Senzing truth-sets] into the Senzing engine.

In this exercise:

1. [Senzing truth-sets] are downloaded from the internet.
1. Data source names are extracted from the downloaded data.
1. The data source names are added to the Senzing configuration.
1. Records from the downloaded data are added to Senzing.
1. Example queries are demonstrated.

**Warning**:
Remember that the notebooks are not permanent when using the Playground docker container. 
You can save a notebook to your workstation by selecting <b>File</b> > <b>Download</b> in Jupyter Lab.

[Senzing truth-sets]: https://github.com/Senzing/truth-sets

## Prepare Go enviroment

Define global imports, types, variables, and functions.

In [None]:
import (
	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials/insecure"
    "github.com/senzing-garage/sz-sdk-go-grpc/szabstractfactory"
    "github.com/senzing-garage/sz-sdk-go/senzing"    
)

type DataSourceKey struct {
    Data_Source      string
}

type Record struct {
    Data_Source      string
    Record_ID        string
}

var (
    ctx = context.TODO()
    err error
	grpcAddress    = "localhost:8261"
    jsonDataSource DataSourceKey
    homePath = "./"
    jsonRecord Record    
    szConfig senzing.SzConfig
    truthSetURLPrefix = "https://raw.githubusercontent.com/Senzing/truth-sets/refs/heads/main/truthsets/demo/"
    truthSetFileNames = []string{"customers.json", "reference.json", "watchlist.json"}
)   

Create a function for testing error conditions.

In [None]:
func testErr(err error) {
    if err != nil {
        panic(err)
    }
}

Create a function for getting an SzAbstractFactory that talks over gRPC.

In [None]:
func getSzAbstractFactory() senzing.SzAbstractFactory {
    grpcConnection, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
    testErr(err)
    return &szabstractfactory.Szabstractfactory{
    	GrpcConnection: grpcConnection,
    }    
} 

Create functions for getting Senzing objects from the SzAbstractFactory.

In [None]:
func getSzConfig(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzConfig {
    result, err := szAbstractFactory.CreateConfig(ctx)
    testErr(err)
    return result
}

func getSzConfigManager(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzConfigManager {
    result, err := szAbstractFactory.CreateConfigManager(ctx)
    testErr(err)
    return result
}

func getSzDiagnostic(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzDiagnostic {
    result, err := szAbstractFactory.CreateDiagnostic(ctx)
    testErr(err)
    return result
}

func getSzEngine(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzEngine {
    result, err := szAbstractFactory.CreateEngine(ctx)
    testErr(err)
    return result
}

func getSzProduct(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzProduct {
    result, err := szAbstractFactory.CreateProduct(ctx)
    testErr(err)
    return result
}

 Create a function for downloading file via URL.

In [None]:
func downloadFile(url string, filepath string) error {
	outputFile, err := os.Create(filepath)
	if err != nil {
		return err
	}
	defer outputFile.Close()

	response, err := http.Get(url)
	if err != nil {
		return err
	}
	defer response.Body.Close()

	_, err = io.Copy(outputFile, response.Body)
	if err != nil {
		return err
	}

	return nil
}

Create a function for extracting "DATA_SOURCE" values from JSON lines in files.

In [None]:
func getDataSources() []string {
    result := []string{}
    for i := 0; i < len(truthSetFileNames); i++ {
        filepath := fmt.Sprintf("%s%s", homePath, truthSetFileNames[i])
        file, err := os.Open(filepath)
        if err != nil {
            panic(err)
        }
        defer file.Close()   

        scanner := bufio.NewScanner(file)
        for scanner.Scan() {
            line := scanner.Bytes()
            err := json.Unmarshal(line, &jsonDataSource)  
            testErr(err)
            if !slices.Contains(result, jsonDataSource.Data_Source) {
                result = append(result, jsonDataSource.Data_Source) 
            }        
        }

        if err := scanner.Err(); err != nil {
            log.Fatal(err)
        }    
    }
    return result
}

Create a function to pretty print JSON.

In [None]:
func prettyJSON(str string) (string, error) {
    var prettyJSON bytes.Buffer
    if err := json.Indent(&prettyJSON, []byte(str), "", "    "); err != nil {
        return "", err
    }
    return prettyJSON.String(), nil
}

Download truth-set files.

In [None]:
%% 
for i := 0; i < len(truthSetFileNames); i++ {
    url := fmt.Sprintf("%s/%s", truthSetURLPrefix, truthSetFileNames[i])
    filepath := fmt.Sprintf("%s%s", homePath, truthSetFileNames[i])
    err := downloadFile(url, filepath)
    testErr(err)
}

## Identify data sources

Discover `DATA_SOURCE` values in records.

In [None]:
var dataSources = getDataSources()

%%
fmt.Printf("Found the following DATA_SOURCE values in the data: %v\n", dataSources)

## Update Senzing configuration

Create an [abstract factory] for accessing Senzing via gRPC.

[abstract factory]: https://en.wikipedia.org/wiki/Abstract_factory_pattern

In [None]:
var szAbstractFactory = getSzAbstractFactory()

Using the abstract factory, create Senzing objects.
**Note:** This implementation is a little awkward, but it is needed to make global variables using the [gonb] Jupyter Kernel.
Normally it would be something like:

```go
szConfig, err := szAbstractFactory.CreateConfig(ctx)
```

[gonb]: https://github.com/janpfeifer/gonb

In [None]:
var szConfig = getSzConfig(ctx, szAbstractFactory)
var szConfigManager = getSzConfigManager(ctx, szAbstractFactory)
var szDiagnostic = getSzDiagnostic(ctx, szAbstractFactory)

Get current Senzing configuration.

In [None]:
%%
oldConfigID, err := szConfigManager.GetDefaultConfigID(ctx)
testErr(err)

oldJsonConfig, err := szConfigManager.GetConfig(ctx, oldConfigID)
testErr(err)

configHandle, err := szConfig.ImportConfig(ctx, oldJsonConfig)
testErr(err)

for _, value := range dataSources {
    _, err := szConfig.AddDataSource(ctx, configHandle, value)
    if err != nil {
        fmt.Println(err)
    }
}  

newJsonConfig, err := szConfig.ExportConfig(ctx, configHandle)
testErr(err)

newConfigID, err := szConfigManager.AddConfig(ctx, newJsonConfig, "Add TruthSet datasources")
testErr(err)

err = szConfigManager.ReplaceDefaultConfigID(ctx, oldConfigID, newConfigID)
testErr(err)

szAbstractFactory.Reinitialize(ctx, newConfigID)

TODO: Remove after debugging.

In [None]:
var szEngine = getSzEngine(ctx, szAbstractFactory)

%%
defaultConfigID, err := szConfigManager.GetDefaultConfigID(ctx)
fmt.Printf("default: %d\n", defaultConfigID)

configDefinition, err := szConfigManager.GetConfig(ctx, defaultConfigID)
testErr(err)

configHandle, err := szConfig.ImportConfig(ctx, configDefinition)
testErr(err)

dataXX, err := szConfig.GetDataSources(ctx, configHandle)
testErr(err)

fmt.Println(dataXX)

## Add records

Call Senzing repeatedly to add records.

In [None]:
%%
for _, value := range truthSetFileNames {
    filepath := fmt.Sprintf("%s%s", homePath, value)
    file, err := os.Open(filepath)
    testErr(err)
    defer file.Close()      

    scanner := bufio.NewScanner(file)
    for scanner.Scan() {
        line := scanner.Bytes()
        err := json.Unmarshal(line, &jsonRecord)  
        testErr(err)
        result, err := szEngine.AddRecord(ctx, jsonRecord.Data_Source, jsonRecord.Record_ID, string(line), senzing.SzWithInfo)
        testErr(err)
        fmt.Println(result)
    }    
}

## View results

Retrieve an entity by identifying a record of the entity. Use the `SZ_ENTITY_INCLUDE_RECORD_SUMMARY` flag from among the [get_entity flags].

[get_entity flags]: https://senzing.com/docs/4_beta/flags/flags_get_entity/

In [None]:
%%
customer1070Entity, err := szEngine.GetEntityByRecordID(ctx, "CUSTOMERS", "1070", senzing.SzEntityIncludeRecordSummary)
testErr(err)

customer1070EntityPretty, err := prettyJSON(customer1070Entity)
testErr(err)

fmt.Println(customer1070EntityPretty)

Search for entities by attributes.

In [None]:
%%
searchProfile := ""
searchQuery := `{
    "name_full": "robert smith",
    "date_of_birth": "11/12/1978"
}`

searchResult, err := szEngine.SearchByAttributes(ctx, searchQuery, searchProfile, senzing.SzSearchByAttributesDefaultFlags)
testErr(err)

searchResultPretty, err := prettyJSON(searchResult)
testErr(err)

fmt.Println(searchResultPretty)