From fd52cd8ea8a69a55b36724d7e40180dedb9835ea Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Tue, 15 Aug 2023 17:13:28 -0700 Subject: [PATCH] Add additional field definitions to association schema, make text tokenized versions of text fields on associations --- scripts/add_association_copyfields.sh | 47 +++++++++++++++++++ scripts/add_fieldtypes.sh | 7 +++ scripts/autocomplete-fieldtype.json | 26 ++++++++++ scripts/load_solr.sh | 3 +- ...tity_fieldtypes.sh => text-fieldtype.json} | 32 +------------ 5 files changed, 84 insertions(+), 31 deletions(-) create mode 100755 scripts/add_association_copyfields.sh create mode 100755 scripts/add_fieldtypes.sh create mode 100644 scripts/autocomplete-fieldtype.json rename scripts/{add_entity_fieldtypes.sh => text-fieldtype.json} (65%) mode change 100755 => 100644 diff --git a/scripts/add_association_copyfields.sh b/scripts/add_association_copyfields.sh new file mode 100755 index 00000000..7b9cf729 --- /dev/null +++ b/scripts/add_association_copyfields.sh @@ -0,0 +1,47 @@ +#!/bin/sh + +# Add two dynamicfields declarations to the schema + +# One for text fields, have to replace because _t comes built in + +# delete the _t dynamic field +curl -X POST -H 'Content-type:application/json' --data-binary '{ + "delete-dynamic-field": { + "name": "*_t" + } +}' http://localhost:8983/solr/association/schema + +curl -X POST -H 'Content-type:application/json' --data-binary '{ + "add-dynamic-field": { + "name": "*_t", + "type": "text", + "indexed": true, + "stored": false, + "multiValued": true + } +}' http://localhost:8983/solr/association/schema + +# One for autocomplete fields + +curl -X POST -H 'Content-type:application/json' --data-binary '{ + "add-dynamic-field": { + "name": "*_ac", + "type": "autocomplete", + "indexed": true, + "stored": false, + "multiValued": true + } +}' http://localhost:8983/solr/association/schema + + +# now add copyfields declarations for subject_label, subject_closure_label, object_label, object_closure_label + +for field in subject_label subject_closure_label subject_taxon subject_taxon_label predicate object_label object_closure_label object_taxon object_taxon_label primary_knowledge_source in_taxon_label qualifier_label onset_qualifier_label frequency_qualifier_label sex_qualifier_label +do + curl -X POST -H 'Content-type:application/json' --data-binary "{ + \"add-copy-field\": { + \"source\":\"$field\", + \"dest\": \"${field}_t\" + } +}" http://localhost:8983/solr/association/schema +done diff --git a/scripts/add_fieldtypes.sh b/scripts/add_fieldtypes.sh new file mode 100755 index 00000000..678d1147 --- /dev/null +++ b/scripts/add_fieldtypes.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +for core in entity association +do + curl -X POST -H 'Content-type:application/json' -d @scripts/text-fieldtype.json http://localhost:8983/solr/$core/schema + curl -X POST -H 'Content-type:application/json' -d @scripts/autocomplete-fieldtype.json http://localhost:8983/solr/$core/schema +done diff --git a/scripts/autocomplete-fieldtype.json b/scripts/autocomplete-fieldtype.json new file mode 100644 index 00000000..84450158 --- /dev/null +++ b/scripts/autocomplete-fieldtype.json @@ -0,0 +1,26 @@ +{ + "add-field-type": { + "name": "autocomplete", + "class": "solr.TextField", + "positionIncrementGap": "100", + "indexAnalyzer": { + "tokenizer": { "class": "solr.StandardTokenizerFactory" }, + "filters": [ + { "class": "solr.ASCIIFoldingFilterFactory" }, + { "class": "solr.LowerCaseFilterFactory" }, + { + "class": "solr.EdgeNGramFilterFactory", + "minGramSize": "1", + "maxGramSize": "25" + } + ] + }, + "queryAnalyzer": { + "tokenizer": { "class": "solr.StandardTokenizerFactory" }, + "filters": [ + { "class": "solr.ASCIIFoldingFilterFactory" }, + { "class": "solr.LowerCaseFilterFactory" } + ] + } + } +} diff --git a/scripts/load_solr.sh b/scripts/load_solr.sh index d1d48eb3..bc01baab 100755 --- a/scripts/load_solr.sh +++ b/scripts/load_solr.sh @@ -24,7 +24,7 @@ sleep 10 # todo: ideally, this will live in linkml-solr echo "Adding additional fieldtypes" -scripts/add_entity_fieldtypes.sh +scripts/add_fieldtypes.sh sleep 5 echo "Adding entity schema" @@ -38,6 +38,7 @@ sleep 5 # todo: this also should live in linkml-solr, and copy-fields should be based on the schema echo "Add dynamic fields and copy fields declarations" scripts/add_entity_copyfields.sh +scripts/add_association_copyfields.sh sleep 5 echo "Loading entities" diff --git a/scripts/add_entity_fieldtypes.sh b/scripts/text-fieldtype.json old mode 100755 new mode 100644 similarity index 65% rename from scripts/add_entity_fieldtypes.sh rename to scripts/text-fieldtype.json index f9b0003e..24f3c6c1 --- a/scripts/add_entity_fieldtypes.sh +++ b/scripts/text-fieldtype.json @@ -1,5 +1,4 @@ -#!/bin/sh -curl -X POST -H 'Content-type:application/json' --data-binary '{ +{ "add-field-type": { "name": "text", "class": "solr.TextField", @@ -52,31 +51,4 @@ curl -X POST -H 'Content-type:application/json' --data-binary '{ ] } } -}' http://localhost:8983/solr/entity/schema - -curl -X POST -H 'Content-type:application/json' --data-binary '{ - "add-field-type": { - "name": "autocomplete", - "class": "solr.TextField", - "positionIncrementGap": "100", - "indexAnalyzer": { - "tokenizer": { "class": "solr.StandardTokenizerFactory" }, - "filters": [ - { "class": "solr.ASCIIFoldingFilterFactory" }, - { "class": "solr.LowerCaseFilterFactory" }, - { - "class": "solr.EdgeNGramFilterFactory", - "minGramSize": "1", - "maxGramSize": "25" - } - ] - }, - "queryAnalyzer": { - "tokenizer": { "class": "solr.StandardTokenizerFactory" }, - "filters": [ - { "class": "solr.ASCIIFoldingFilterFactory" }, - { "class": "solr.LowerCaseFilterFactory" } - ] - } - } -}' http://localhost:8983/solr/entity/schema +}