# Load Senzing truth-sets

These instructions load the [Senzing truth-sets] into the Senzing engine.

In this exercise:

1. [Senzing truth-sets] are downloaded from the internet.
1. Data source names are extracted from the downloaded data.
1. The data source names are added to the Senzing configuration.
1. Records from the downloaded data are added to Senzing.
1. Example queries are demonstrated.

**Warning**:
Remember that the notebooks are not permanent when using the Playground docker container. 
You can save a notebook to your workstation by selecting <b>File</b> > <b>Download</b> in Jupyter Lab.

[Senzing truth-sets]: https://github.com/Senzing/truth-sets

## Prepare Go enviroment

Define global imports, types, variables, and functions.

In [1]:
import (
	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials/insecure"
    "github.com/senzing-garage/sz-sdk-go-grpc/szabstractfactory"
    "github.com/senzing-garage/sz-sdk-go/senzing"
)

type DataSourceKey struct {
    Data_Source string
}

type Record struct {
    Data_Source string
    Record_ID   string
}

var (
    ctx = context.TODO()
    fileName = "senzing-example-data.json"    
    err error
	grpcAddress = "localhost:8261"
    jsonDataSource DataSourceKey
    jsonRecord Record
    homePath = "./"
)

Create a function for testing error conditions.

In [2]:
func testErr(err error) {
    if err != nil {
        panic(err)
    }
}

Create a function for getting an SzAbstractFactory that talks over gRPC.

In [3]:
func getSzAbstractFactory() senzing.SzAbstractFactory {
    grpcConnection, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
    testErr(err)
    return &szabstractfactory.Szabstractfactory{
    	GrpcConnection: grpcConnection,
    }    
} 

Create functions for getting Senzing objects from the SzAbstractFactory.

In [4]:
func getSzConfig(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzConfig {
    result, err := szAbstractFactory.CreateConfig(ctx)
    testErr(err)
    return result
}

func getSzConfigManager(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzConfigManager {
    result, err := szAbstractFactory.CreateConfigManager(ctx)
    testErr(err)
    return result
}

func getSzEngine(ctx context.Context, szAbstractFactory senzing.SzAbstractFactory) senzing.SzEngine {
    result, err := szAbstractFactory.CreateEngine(ctx)
    testErr(err)
    return result
}

 Create a function for downloading file via URL.

In [5]:
func downloadFile(url string, filepath string) error {
	outputFile, err := os.Create(filepath)
	if err != nil {
		return err
	}
	defer outputFile.Close()

	response, err := http.Get(url)
	if err != nil {
		return err
	}
	defer response.Body.Close()

	_, err = io.Copy(outputFile, response.Body)
	if err != nil {
		return err
	}

	return nil
}

Create a function for extracting "DATA_SOURCE" values from JSON lines in files.

Create a function to pretty print JSON.

Download truth-set files.

In [6]:
func getDataSources(filePath string) []string {
	result := []string{}
	file, err := os.Open(filePath)
	if err != nil {
		panic(err)
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		line := scanner.Bytes()
		err := json.Unmarshal(line, &jsonDataSource)
		testErr(err)
		if !slices.Contains(result, jsonDataSource.Data_Source) {
			result = append(result, jsonDataSource.Data_Source)
		}
	}

	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}
	return result
}

In [7]:
func updateSenzingConfig(szAbstractFactory senzing.SzAbstractFactory, dataSourceNames []string) error {

	szConfig, err := szAbstractFactory.CreateConfig(ctx)
	if err != nil {
		return err
	}

	szConfigManager, err := szAbstractFactory.CreateConfigManager(ctx)
	if err != nil {
		return err
	}

	oldConfigID, err := szConfigManager.GetDefaultConfigID(ctx)
	if err != nil {
		return err
	}

	oldJsonConfig, err := szConfigManager.GetConfig(ctx, oldConfigID)
	if err != nil {
		return err
	}

	configHandle, err := szConfig.ImportConfig(ctx, oldJsonConfig)
	if err != nil {
		return err
	}

	for _, value := range dataSourceNames {
		_, err := szConfig.AddDataSource(ctx, configHandle, value)
		if err != nil {
			fmt.Println(err)
		}
	}

	newJsonConfig, err := szConfig.ExportConfig(ctx, configHandle)
	if err != nil {
		return err
	}

	newConfigID, err := szConfigManager.AddConfig(ctx, newJsonConfig, "Add TruthSet datasources")
	if err != nil {
		return err
	}

	err = szConfigManager.ReplaceDefaultConfigID(ctx, oldConfigID, newConfigID)
	if err != nil {
		return err
	}

	err = szAbstractFactory.Reinitialize(ctx, newConfigID)
	if err != nil {
		return err
	}

	return nil
}

In [8]:
func addRecords(szAbstractFactory senzing.SzAbstractFactory, fileName string) error {
	file, err := os.Open(fileName)
	if err != nil {
		return err
	}
	defer file.Close()

	szEngine, err := szAbstractFactory.CreateEngine(ctx)
	if err != nil {
		return err
	}

	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		line := scanner.Bytes()
		err := json.Unmarshal(line, &jsonRecord)
		testErr(err)
		result, err := szEngine.AddRecord(ctx, jsonRecord.Data_Source, jsonRecord.Record_ID, string(line), senzing.SzWithInfo)
		testErr(err)
		fmt.Println(result)
	}
	return nil
}

In [9]:
func asPrettyJSON(str string) string {
	var prettyJSON bytes.Buffer
	if err := json.Indent(&prettyJSON, []byte(str), "", "    "); err != nil {
		return str
	}
	return prettyJSON.String()
}

In [11]:
%%
// User input.

inputFile := fmt.Sprintf("%s%s", homePath, fileName)

// Create Senzing gRPC client.

grpcConnection, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
testErr(err)
szAbstractFactory := &szabstractfactory.Szabstractfactory{
	GrpcConnection: grpcConnection,
}

// Identify datasources and update Senzing configuration.

dataSourceNames := getDataSources(inputFile)
fmt.Printf("Found the following DATA_SOURCE values in the data: %v\n", dataSourceNames)
err = updateSenzingConfig(szAbstractFactory, dataSourceNames)
testErr(err)

// Add records.

err = addRecords(szAbstractFactory, inputFile)
testErr(err)

Found the following DATA_SOURCE values in the data: [MY_DATASOURCE]
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"157616467","AFFECTED_ENTITIES":[{"ENTITY_ID":100001}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"115212881","AFFECTED_ENTITIES":[{"ENTITY_ID":100002}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"645217904","AFFECTED_ENTITIES":[{"ENTITY_ID":100003}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"212359276","AFFECTED_ENTITIES":[{"ENTITY_ID":100004}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"526429955","AFFECTED_ENTITIES":[{"ENTITY_ID":100005}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"604947448","AFFECTED_ENTITIES":[{"ENTITY_ID":100006}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"MY_DATASOURCE","RECORD_ID":"513789670","AFFECTED_ENTITIES":[{"ENTITY_ID":100007}],"INTER

## Identify data sources

Discover `DATA_SOURCE` values in records.

In [9]:
var dataSources = getDataSources()

%%
fmt.Printf("Found the following DATA_SOURCE values in the data: %v\n", dataSources)

Found the following DATA_SOURCE values in the data: [CUSTOMERS REFERENCE WATCHLIST]


## Update Senzing configuration

Create an [abstract factory] for accessing Senzing via gRPC.

[abstract factory]: https://en.wikipedia.org/wiki/Abstract_factory_pattern

In [10]:
var szAbstractFactory = getSzAbstractFactory()

Using the abstract factory, create Senzing objects.
**Note:** This implementation is a little awkward, but it is needed to make global variables using the [gonb] Jupyter Kernel.
Normally it would be something like:

```go
szConfig, err := szAbstractFactory.CreateConfig(ctx)
```

[gonb]: https://github.com/janpfeifer/gonb

In [11]:
var szConfig = getSzConfig(ctx, szAbstractFactory)
var szConfigManager = getSzConfigManager(ctx, szAbstractFactory)

Get current Senzing configuration.

In [12]:
%%
oldConfigID, err := szConfigManager.GetDefaultConfigID(ctx)
testErr(err)

oldJsonConfig, err := szConfigManager.GetConfig(ctx, oldConfigID)
testErr(err)

configHandle, err := szConfig.ImportConfig(ctx, oldJsonConfig)
testErr(err)

for _, value := range dataSources {
    _, err := szConfig.AddDataSource(ctx, configHandle, value)
    if err != nil {
        fmt.Println(err)
    }
}  

newJsonConfig, err := szConfig.ExportConfig(ctx, configHandle)
testErr(err)

newConfigID, err := szConfigManager.AddConfig(ctx, newJsonConfig, "Add TruthSet datasources")
testErr(err)

err = szConfigManager.ReplaceDefaultConfigID(ctx, oldConfigID, newConfigID)
testErr(err)

err = szAbstractFactory.Reinitialize(ctx, newConfigID)
testErr(err)

## Add records

Call Senzing repeatedly to add records.

In [13]:
var szEngine = getSzEngine(ctx, szAbstractFactory)

%%
for _, value := range truthSetFileNames {
    filepath := fmt.Sprintf("%s%s", homePath, value)
    file, err := os.Open(filepath)
    testErr(err)
    defer file.Close()      

    scanner := bufio.NewScanner(file)
    for scanner.Scan() {
        line := scanner.Bytes()
        err := json.Unmarshal(line, &jsonRecord)  
        testErr(err)
        result, err := szEngine.AddRecord(ctx, jsonRecord.Data_Source, jsonRecord.Record_ID, string(line), senzing.SzWithInfo)
        testErr(err)
        fmt.Println(result)
    }    
}

{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1001","AFFECTED_ENTITIES":[{"ENTITY_ID":1}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1002","AFFECTED_ENTITIES":[{"ENTITY_ID":1}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1003","AFFECTED_ENTITIES":[{"ENTITY_ID":1}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1004","AFFECTED_ENTITIES":[{"ENTITY_ID":1}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1005","AFFECTED_ENTITIES":[{"ENTITY_ID":1}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1009","AFFECTED_ENTITIES":[{"ENTITY_ID":6}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1010","AFFECTED_ENTITIES":[{"ENTITY_ID":6}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_SOURCE":"CUSTOMERS","RECORD_ID":"1011","AFFECTED_ENTITIES":[{"ENTITY_ID":8}],"INTERESTING_ENTITIES":{"ENTITIES":[]}}
{"DATA_S

## View results

Retrieve an entity by identifying a record of the entity. Use the `SZ_ENTITY_INCLUDE_RECORD_SUMMARY` flag from among the [get_entity flags].

[get_entity flags]: https://senzing.com/docs/4_beta/flags/flags_get_entity/

In [14]:
%%
customer1070Entity, err := szEngine.GetEntityByRecordID(ctx, "CUSTOMERS", "1070", senzing.SzEntityIncludeRecordSummary)
testErr(err)
fmt.Println(asPrettyJSON(customer1070Entity))

{
    "RESOLVED_ENTITY": {
        "ENTITY_ID": 55,
        "RECORD_SUMMARY": [
            {
                "DATA_SOURCE": "CUSTOMERS",
                "RECORD_COUNT": 2
            },
            {
                "DATA_SOURCE": "REFERENCE",
                "RECORD_COUNT": 1
            }
        ]
    }
}


Search for entities by attributes.

In [15]:
%%
searchProfile := ""
searchQuery := `{
    "name_full": "robert smith",
    "date_of_birth": "11/12/1978"
}`

searchResult, err := szEngine.SearchByAttributes(ctx, searchQuery, searchProfile, senzing.SzSearchByAttributesDefaultFlags)
testErr(err)
fmt.Println(asPrettyJSON(searchResult))

{
    "RESOLVED_ENTITIES": [
        {
            "MATCH_INFO": {
                "MATCH_LEVEL_CODE": "RESOLVED",
                "MATCH_KEY": "+NAME+DOB",
                "ERRULE_CODE": "SNAME_SSTAB",
                "CANDIDATE_KEYS": {
                    "DOB": [
                        {
                            "FEAT_ID": 21,
                            "FEAT_DESC": "11/12/1978"
                        }
                    ],
                    "NAMEDATE_KEY": [
                        {
                            "FEAT_ID": 14,
                            "FEAT_DESC": "RPRT|SM0|DOB.MMDD_HASH=1211"
                        },
                        {
                            "FEAT_ID": 15,
                            "FEAT_DESC": "RPRT|SM0|DOB=71211"
                        },
                        {
                            "FEAT_ID": 33,
                            "FEAT_DESC": "RPRT|SM0|DOB.MMYY_HASH=1178"
                        }
                    ],
        