# Load Senzing truth-sets

These instructions load the [Senzing truth-sets] into the Senzing engine.

In this exercise:

1. [Senzing truth-sets] are downloaded from the internet.
1. Data source names are extracted from the downloaded data.
1. The data source names are added to the Senzing configuration.
1. Records from the downloaded data are added to Senzing.
1. Example queries are demonstrated.

**Warning**:
Remember that the notebooks are not permanent when using the Playground docker container. 
You can save a notebook to your workstation by selecting <b>File</b> > <b>Download</b> in Jupyter Lab.

[Senzing truth-sets]: https://github.com/Senzing/truth-sets

## Prepare Go enviroment

Define global imports, types, and variables.

In [None]:
import (
	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials/insecure"
    "github.com/senzing-garage/sz-sdk-go-grpc/szabstractfactory"
    "github.com/senzing-garage/sz-sdk-go/senzing"
)

type DataSourceKey struct {
    Data_Source string
}

type Record struct {
    Data_Source string
    Record_ID   string
}

var (
    ctx = context.TODO()
    err error
	grpcAddress = "localhost:8261"
    jsonDataSource DataSourceKey
    jsonRecord Record
    homePath = "./"
    szConfig senzing.SzConfig
    truthSetURLPrefix = "https://raw.githubusercontent.com/Senzing/truth-sets/refs/heads/main/truthsets/demo/"
    truthSetFileNames = []string{"customers.json", "reference.json", "watchlist.json"}
)

## Define functions

Create a function for testing error conditions.

In [None]:
func testErr(err error) {
    if err != nil {
        panic(err)
    }
}

Create a function for getting an SzAbstractFactory that talks over gRPC.

In [None]:
func getSzAbstractFactory() senzing.SzAbstractFactory {
    grpcConnection, err := grpc.NewClient(grpcAddress, grpc.WithTransportCredentials(insecure.NewCredentials()))
    testErr(err)
    return &szabstractfactory.Szabstractfactory{
    	GrpcConnection: grpcConnection,
    }
}

 Create a function for downloading file via URL.

In [None]:
func downloadFile(url string, filepath string) error {
	outputFile, err := os.Create(filepath)
	if err != nil {
		return err
	}
	defer outputFile.Close()

	response, err := http.Get(url)
	if err != nil {
		return err
	}
	defer response.Body.Close()

	_, err = io.Copy(outputFile, response.Body)
	if err != nil {
		return err
	}

	return nil
}

Create a function for extracting "DATA_SOURCE" values from a file of JSON lines.

In [None]:
func extractDataSources(filePath string) []string {
	result := []string{}
	file, err := os.Open(filePath)
	if err != nil {
		panic(err)
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		line := scanner.Bytes()
		err := json.Unmarshal(line, &jsonDataSource)
		testErr(err)
		if !slices.Contains(result, jsonDataSource.Data_Source) {
			result = append(result, jsonDataSource.Data_Source)
		}
	}

	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}
	return result
}

 Create a function for adding datasource values to the Senzing configuration.

In [None]:
func addDatasourcesToSenzingConfig(szAbstractFactory senzing.SzAbstractFactory, dataSourceNames []string) error {

	szConfig, err := szAbstractFactory.CreateConfig(ctx)
	if err != nil {
		return err
	}

	szConfigManager, err := szAbstractFactory.CreateConfigManager(ctx)
	if err != nil {
		return err
	}

	oldConfigID, err := szConfigManager.GetDefaultConfigID(ctx)
	if err != nil {
		return err
	}

	oldJsonConfig, err := szConfigManager.GetConfig(ctx, oldConfigID)
	if err != nil {
		return err
	}

	configHandle, err := szConfig.ImportConfig(ctx, oldJsonConfig)
	if err != nil {
		return err
	}

	for _, value := range dataSourceNames {
		_, err := szConfig.AddDataSource(ctx, configHandle, value)
		if err != nil {
			fmt.Println(err)
		}
	}

	newJsonConfig, err := szConfig.ExportConfig(ctx, configHandle)
	if err != nil {
		return err
	}

	newConfigID, err := szConfigManager.AddConfig(ctx, newJsonConfig, "Add TruthSet datasources")
	if err != nil {
		return err
	}

	err = szConfigManager.ReplaceDefaultConfigID(ctx, oldConfigID, newConfigID)
	if err != nil {
		return err
	}

	err = szAbstractFactory.Reinitialize(ctx, newConfigID)
	if err != nil {
		return err
	}

	return nil
}

Create a function for adding records into Senzing.

In [None]:
func addRecords(szAbstractFactory senzing.SzAbstractFactory, filepath string) error {
    file, err := os.Open(filepath)
	if err != nil {
		return err
	}
    defer file.Close()

    szEngine, err := szAbstractFactory.CreateEngine(ctx)
	if err != nil {
		return err
	}

    scanner := bufio.NewScanner(file)
    for scanner.Scan() {
        line := scanner.Bytes()
        err := json.Unmarshal(line, &jsonRecord)
	    if err != nil {
		    return err
	    }
        result, err := szEngine.AddRecord(ctx, jsonRecord.Data_Source, jsonRecord.Record_ID, string(line), senzing.SzWithInfo)
	    if err != nil {
		    return err
	    }
        fmt.Println(result)
    }
    return nil
}

Create a function to pretty print JSON.

In [None]:
func asPrettyJSON(str string) string {
	var prettyJSON bytes.Buffer
	if err := json.Indent(&prettyJSON, []byte(str), "", "    "); err != nil {
		return str
	}
	return prettyJSON.String()
}

## Main

Download truth-set files.

In [None]:
%%
for i := 0; i < len(truthSetFileNames); i++ {
    url := fmt.Sprintf("%s/%s", truthSetURLPrefix, truthSetFileNames[i])
    filepath := fmt.Sprintf("%s%s", homePath, truthSetFileNames[i])
    err := downloadFile(url, filepath)
    testErr(err)
}

Create an [abstract factory] for accessing Senzing via gRPC.

[abstract factory]: https://en.wikipedia.org/wiki/Abstract_factory_pattern

In [None]:
var szAbstractFactory = getSzAbstractFactory()

Add datasources to the Senzing configuration.

In [None]:
%%
// Discover DATA_SOURCE values in records.

dataSources := []string{}
for _, value := range truthSetFileNames {
    partialDataSources := extractDataSources(fmt.Sprintf("%s%s", homePath, value))
    dataSources = append(dataSources, partialDataSources...)
}
fmt.Printf("Found the following DATA_SOURCE values in the data: %v\n", dataSources)

// Update Senzing configuration.

err = addDatasourcesToSenzingConfig(szAbstractFactory, dataSources)

Add records.

In [None]:
%%
for _, value := range truthSetFileNames {
    err = addRecords(szAbstractFactory, fmt.Sprintf("%s%s", homePath, value))
}

## View results

Retrieve an entity by identifying a record of the entity. Use the `SZ_ENTITY_INCLUDE_RECORD_SUMMARY` flag from among the [get_entity flags].

[get_entity flags]: https://senzing.com/docs/4_beta/flags/flags_get_entity/

In [None]:
%%
szEngine, err := szAbstractFactory.CreateEngine(ctx)
testErr(err)

customer1070Entity, err := szEngine.GetEntityByRecordID(ctx, "CUSTOMERS", "1070", senzing.SzEntityIncludeRecordSummary)
testErr(err)
fmt.Println(asPrettyJSON(customer1070Entity))

Search for entities by attributes.

In [None]:
%%
szEngine, err := szAbstractFactory.CreateEngine(ctx)
testErr(err)

searchProfile := ""
searchQuery := `{
    "name_full": "robert smith",
    "date_of_birth": "11/12/1978"
}`

searchResult, err := szEngine.SearchByAttributes(ctx, searchQuery, searchProfile, senzing.SzSearchByAttributesDefaultFlags)
testErr(err)
fmt.Println(asPrettyJSON(searchResult))