# Solr notes

See https://hackmd.io/HnDFextyRU2gLSaBCT7vjw for general notes.

## Ubuntu setup

Solr config:
```
/etc/default/solr.in.sh

ZK_HOST="localhost:2181/solr"
ZK_CLIENT_TIMEOUT="30000"
SOLR_HOST="127.0.0.1"
SOLR_WAIT_FOR_ZK="30"
SOLR_PID_DIR="/var/solr"
SOLR_HOME="/var/solr/data"
LOG4J_PROPS="/var/solr/log4j2.xml"
SOLR_LOGS_DIR="/var/solr/logs"
SOLR_PORT="8983"
```

Setup zookeeper:
```
bin/solr zk mkroot /solr -z localhost:2181
server/scripts/cloud-scripts/zkcli.sh \
  -z localhost:2181 \
  -cmd bootstrap \
  -solrhome /var/solr/data
```

Create a core:
```
sudo su - solr
/opt/solr/bin/solr create -c isb_core_records
```

Then set autocreate fields off, with:
```
solr config -c isb_core_records -p 8983 \
  -action set-user-property \
  -property update.autoCreateFields \
  -value false
```


In [1]:
import json
import requests

SOLR_API = "http://localhost:8983/api/collections/isb_core_records/"
MEDIA_JSON = "application/json"

def pj(o):
    print(json.dumps(o, indent=2))

def listFields():
    headers = {
        "Accept":MEDIA_JSON
    }
    _schema = requests.get(f"{SOLR_API}schema", headers=headers).json()
    return _schema.get("schema",{}).get("fields")

def listFieldTypes():
    headers = {
        "Accept":MEDIA_JSON
    }
    _schema = requests.get(f"{SOLR_API}schema", headers=headers).json()
    return _schema.get("schema",{}).get("fieldTypes")

def createField(fname, ftype="string", stored=True, indexed=True, default=None, multivalued=False):
    headers = {
        "Content-Type":MEDIA_JSON
    }
    data = {
    "add-field": {
        "name": fname,
        "type": ftype,
        "stored": stored,
        "indexed": indexed
    }}
    if multivalued:
        data["add-field"]["multiValued"] = multivalued
    if not default is None:
        data["add-field"]["default"] = default
    data = json.dumps(data).encode("utf-8")
    res = requests.post(f"{SOLR_API}schema", headers=headers, data=data)
    pj(res.json())
    
def createCopyField(source, dest):
    headers = {
        "Content-Type":MEDIA_JSON
    }
    data = {
    "add-copy-field": {
        "source": source,
        "dest": [dest]
    }}
    data = json.dumps(data).encode("utf-8")
    res = requests.post(f"{SOLR_API}schema", headers=headers, data=data)
    pj(res.json())    

pj(listFields())

[
  {
    "name": "_nest_path_",
    "type": "_nest_path_"
  },
  {
    "name": "_root_",
    "type": "string",
    "docValues": false,
    "indexed": true,
    "stored": false
  },
  {
    "name": "_text_",
    "type": "text_general",
    "multiValued": true,
    "indexed": true,
    "stored": false
  },
  {
    "name": "_version_",
    "type": "plong",
    "indexed": false,
    "stored": false
  },
  {
    "name": "description",
    "type": "string",
    "default": "",
    "indexed": true,
    "stored": true
  },
  {
    "name": "hasContextCategory",
    "type": "string",
    "default": "",
    "multiValued": true,
    "indexed": true,
    "stored": true
  },
  {
    "name": "hasMaterialCategory",
    "type": "string",
    "default": "",
    "multiValued": true,
    "indexed": true,
    "stored": true
  },
  {
    "name": "hasSpecimenCategory",
    "type": "string",
    "default": "",
    "multiValued": true,
    "indexed": true,
    "stored": true
  },
  {
    "name": "id",
    "typ

```
datetime : pdate
URI : string
```

In [None]:
createField("isb_core_id", "string", True, True, None)
createField("source", "string", True, True, None)
createField("label", "string", True, True, None)
createField("description", "string", True, True, None)
createField("description_text", "text_general", True, True, None)
createCopyField("description", "description_text")
createField("hasContextCategory", "string", True, True, None, True)
createField("hasMaterialCategory", "string", True, True, None, True)
createField("keywords", "string", True, True, None, True)
createField("producedBy_label", "string", True, True, None)
createField("producedBy_description", "string", True, True, None)
createField("producedBy_description_text", "text_general", True, True, None)
createCopyField("producedBy_description", "producedBy_description_text")
createField("producedBy_hasFeatureOfInterest", "string", True, True, None)
createField("producedBy_responsibility", "string", True, True, None, True)
createField("producedBy_resultTime", "pdate", True, True, None)
createField("producedBy_samplingSite_description", "string", True, True, None)
createField("producedBy_samplingSite_description_text", "text_general", True, True, None)
createField("producedBy_samplingSite_label", "string", True, True, None)
createField("producedBy_samplingSite_location_elevationInMeters", "pfloat", True, True, None)
createField("producedBy_samplingSite_location_latitude", "location", True, True, None)
createField("producedBy_samplingSite_location_longitude", "location", True, True, None)
createField("producedBy_samplingSite_placeName", "string", True, True, None, True)
createField("registrant", "string", True, True, None, True)
createField("samplingPurpose", "string", True, True, None, True)

In [5]:
createField("producedBy_samplingSite_description_text", "text_general", True, True, None)
createCopyField("producedBy_samplingSite_description", "producedBy_samplingSite_description_text")

{
  "responseHeader": {
    "status": 0,
    "QTime": 453
  }
}
{
  "responseHeader": {
    "status": 0,
    "QTime": 372
  }
}


In [4]:
def deleteRecords(q="*:*"):
    headers = {
        "Content-Type":MEDIA_JSON
    }
    url = f"http://localhost:8983/solr/isb_rel/update"
    doc = {"delete":{"query":q}}
    doc = json.dumps(doc).encode("utf-8")
    params={"commit":"true"}
    res = requests.post(url, headers=headers, data=doc, params=params)
    print(f"status: {res.status_code}")
    print(res.text)

deleteRecords()

status: 200
{
  "responseHeader":{
    "rf":1,
    "status":0,
    "QTime":31}}

