In [1]:
from elasticsearch import Elasticsearch
import tomllib
import hashlib
from pathlib import Path

with open(Path("../../config/elastic.production.toml"), mode="rb") as f:
    cfg = tomllib.load(f)

es = Elasticsearch(cfg['instance'], basic_auth=(
    cfg['username'], cfg['password']),)

index = "people-ude"

if es.indices.exists(index=index):
    es.indices.delete(index=index)


es.indices.create(index=index, mappings={
    # Don't allow new fields https://www.elastic.co/guide/en/elasticsearch/reference/8.8/dynamic.html
    # https://www.elastic.co/guide/en/elasticsearch/reference/8.8/mapping-types.html
    # https://www.elastic.co/guide/en/elasticsearch/reference/8.8/keyword.html#wildcard-field-type
    "dynamic": 'strict',
    "properties": {
        "name":          {"type": "text"},
        "title":         {"type": "text"},
        "email":         {"type": "wildcard"},
        "in.url":        {"type": "wildcard"},
        "in.hash":       {"type": "text"},
        "homepage.url":  {"type": "wildcard"},
        "homepage.hash": {"type": "text"},
    }
}, settings={
    # Intentionally empty since there are not settings yet
})

people = [
    {
        "name":         "Michael Goedicke",
        "title":        "Prof. Dr.",
        "email":        "michael.goedicke@s3.uni-due.de",
        "in.url":       "https://s3.paluno.uni-due.de/team",
        "homepage.url": "https://s3.paluno.uni-due.de/team/michael-goedicke"
    }, {
        "name":         "Lukas Glaser",
        "title":        "",
        "email":        "lukas.glaser@paluno.uni-due.de",
        "in.url":       "https://s3.paluno.uni-due.de/team",
        "homepage.url": "https://s3.paluno.uni-due.de/team/wissenschaftliche-mitarbeiterinnnen-und-mitarbeiter/lukas-glaser"
    }, {
        "name":         "Michael Striewe",
        "title":        "Dr.",
        "email":        "michael.striewe@paluno.uni-due.de",
        "in.url":       "https://s3.paluno.uni-due.de/team",
        "homepage.url": "https://s3.paluno.uni-due.de/team/wissenschaftliche-mitarbeiterinnnen-und-mitarbeiter/michael-striewe"
    }, {
        "name":         "Pedro José Marrón",
        "title":        "Prof. Dr.",
        "email":        "pjmarron@locoslab.com",
        "in.url":       "https://www.nes.uni-due.de/staff/",
        "homepage.url": "https://www.nes.uni-due.de/staff/pjmarron/"
    }, {
        "name":         "Arman Arzani",
        "title":        "",
        "email":        "arman.arzani.due@gmail.com",
        "in.url":       "https://www.nes.uni-due.de/staff/",
        "homepage.url": "https://www.nes.uni-due.de/staff/arzani/"
    }, {
        "name":         "Marcus Handte",
        "title":        "Dr.",
        "email":        "marcus.handte@uni-due.de",
        "in.url":       "https://www.nes.uni-due.de/staff/",
        "homepage.url": "https://www.nes.uni-due.de/staff/handte/"
    }
]

for p in people:
    if p['in.url']:
        p['in.hash'] = hashlib.sha256(p['in.url'].encode("utf-8")).hexdigest()
    if p["homepage.url"]:
        p['homepage.hash'] = hashlib.sha256(p['homepage.url'].encode("utf-8")).hexdigest()
    es.index(index=index, document=p)


In [2]:
from elasticsearch import Elasticsearch
import tomllib
import hashlib
from pathlib import Path

with open(Path("../../config/elastic.production.toml"), mode="rb") as f:
    cfg = tomllib.load(f)

es = Elasticsearch(cfg['instance'], basic_auth=(
    cfg['username'], cfg['password']),)

index = "people-rub"

if es.indices.exists(index=index):
    es.indices.delete(index=index)


es.indices.create(index=index, mappings={
    # Don't allow new fields https://www.elastic.co/guide/en/elasticsearch/reference/8.8/dynamic.html
    # https://www.elastic.co/guide/en/elasticsearch/reference/8.8/mapping-types.html
    # https://www.elastic.co/guide/en/elasticsearch/reference/8.8/keyword.html#wildcard-field-type
    "dynamic": 'strict',
    "properties": {
        "name":          {"type": "text"},
        "title":         {"type": "text"},
        "email":         {"type": "wildcard"},
        "in.url":        {"type": "wildcard"},
        "in.hash":       {"type": "text"},
        "homepage.url":  {"type": "wildcard"},
        "homepage.hash": {"type": "text"},
    }
}, settings={
    # Intentionally empty since there are not settings yet
})

people = [
    {
        "name":         "Eike Kiltz",
        "title":        "Prof. Dr.",
        "email":        "eike.kiltz@rub.de",
        "in.url":       "https://informatik.rub.de/crypto/personen/",
        "homepage.url": "https://informatik.rub.de/kiltz/"
    }, {
        "name":         "Anja Krause",
        "title":        "",
        "email":        "anja.krause@ruhr-uni-bochum.de",
        "in.url":       "https://informatik.rub.de/crypto/personen/",
        "homepage.url": ""
    }, {
        "name":         "Benedikt Auerbach",
        "title":        "Dr.",
        "email":        "Benedikt.Auerbach@ruhr-uni-bochum.de",
        "in.url":       "https://informatik.rub.de/crypto/personen/",
        "homepage.url": ""
    }, {
        "name":         "Annette Kluge",
        "title":        "Prof. Dr. Dipl.-Psych.",
        "email":        "annette.kluge@rub.de",
        "in.url":       "https://www.aow.ruhr-uni-bochum.de/aow/ueberuns/mitarbeiter/index.html.de",
        "homepage.url": "https://www.aow.ruhr-uni-bochum.de/aow/ueberuns/mitarbeiter/kluge.html.de"
    }, {
        "name":         "Sophie Berretta",
        "title":        "M.Sc.",
        "email":        "Sophie.Berretta@Ruhr-Uni-Bochum.de",
        "in.url":       "https://www.aow.ruhr-uni-bochum.de/aow/ueberuns/mitarbeiter/index.html.de",
        "homepage.url": "https://www.aow.ruhr-uni-bochum.de/aow/ueberuns/mitarbeiter/berretta.html.de"
    }, {
        "name":         "Mirko Kaufmann",
        "title":        "M.Sc.",
        "email":        "Mirko.Kaufmann@ruhr-uni-bochum.de",
        "in.url":       "https://www.aow.ruhr-uni-bochum.de/aow/ueberuns/mitarbeiter/index.html.de",
        "homepage.url": "https://www.aow.ruhr-uni-bochum.de/aow/ueberuns/mitarbeiter/kaufmann.html.de"
    }
]

for p in people:
    if p['in.url']:
        p['in.hash'] = hashlib.sha256(p['in.url'].encode("utf-8")).hexdigest()
    if p["homepage.url"]:
        p['homepage.hash'] = hashlib.sha256(p['homepage.url'].encode("utf-8")).hexdigest()
    es.index(index=index, document=p)
