# From text to eKG

This notebook has the minimum sample code to convert a transcribed interaction into is graph representation.

## Description

IN THIS SCENARIO, TWO PEOPLE DISCUSS SIMPLE PREFERENCE OVER BAKING.

## Prerequisites
1. Download [GraphDB](http://graphdb.ontotext.com/)
2. Launch it
3. Create a repository, you can use [this configuration](https://github.com/leolani/cltl-knowledgerepresentation/blob/main/src/cltl/brain/ontologies/BASIC-REPOSITORY-CONFIG-GRAPHDB.ttl)

In [24]:
# Imports
import json
from datetime import date, datetime
from pathlib import Path
from random import getrandbits

import requests
from cltl.brain.long_term_memory import LongTermMemory
from cltl.brain.utils.helper_functions import brain_response_to_json
from cltl.entity_linking.label_linker import LabelBasedLinker
from cltl.triple_extraction.api import Chat
from cltl.triple_extraction.cfg_analyzer import CFGAnalyzer
# from cltl.triple_extraction.spacy_analyzer import spacyAnalyzer
from cltl.triple_extraction.utils.helper_functions import utterance_to_capsules
from tqdm import tqdm

In [25]:
# Data
baking_scenario = [
    "Selene: I like baking cakes",
    "Lea: I don't like baking cakes",
    "Selene: I also like baking cookies",
    "Lea: I like cookies",
    "Selene: do you like chocolate cookies?",
    "Lea: I love chocolate cookies"
    "Selene: I will bake chocolate cookies tonight"
]
baking_scenario_speakers = ["Lea", "Selene"]  # Person who speaks first has to be the second in the array

In [26]:
def create_context_capsule():
    # Define contextual features
    context_id = getrandbits(8)
    place_id = getrandbits(8)
    location = requests.get("https://ipinfo.io").json()
    start_date = date(2021, 3, 12)

    return {"context_id": context_id,
            "date": start_date,
            "place": "Unknown",
            "place_id": place_id,
            "country": location['country'],
            "region": location['region'],
            "city": location['city']}

In [27]:
def main(scenario, speakers):
    # Set logging levels
    #     chat_logger.setLevel(logging.ERROR)
    #     brain_logger.setLevel(logging.ERROR)

    # Create analyzers
    analyzer = CFGAnalyzer()
    linker = LabelBasedLinker()

    # Create folders
    scenario_filepath = Path('./data/baking/')
    graph_filepath = scenario_filepath / Path('graph/')
    graph_filepath.mkdir(parents=True, exist_ok=True)

    # Initialize brain, Chat, 
    brain = LongTermMemory(address="http://localhost:7200/repositories/sandbox",  # Location to save accumulated graph
                           log_dir=graph_filepath,  # Location to save step-wise graphs
                           clear_all=True)  # To start from an empty brain
    chat = Chat(speakers[0], speakers[1])

    # Create context
    context_capsule = create_context_capsule()
    brain.capsule_context(context_capsule)

    # Loop through utterances
    all_responses = []
    all_capsules = []
    capsules_skipped = 0
    for turn, utterance in tqdm(enumerate(scenario)):
        # split speaker from utterance
        speaker, clean_utterance = utterance.split(': ')

        # add utterance to chat and use CFG analyzer to analyze
        chat.add_utterance(clean_utterance, speaker)
        analyzer.analyze(chat.last_utterance)
        capsules = utterance_to_capsules(chat.last_utterance)

        # add statement capsules to brain
        for capsule in capsules:
            # Ugly fix of capsule
            capsule['author'] = {"label": speaker, "type": ["person"]}
            capsule['timestamp'] = datetime.now()

            # Link to specific instances
            linker.link(capsule)

            # Add capsule to brain
            print("\tAdding capsule to brain")
            response = brain.capsule_statement(capsule, reason_types=True)

            # Keep track of responses
            capsule['rdf_file'] = str(response['rdf_log_path'].stem) + '.trig'
            capsule_json = brain_response_to_json(capsule)
            all_capsules.append(capsule_json)
            response_json = brain_response_to_json(response)
            all_responses.append(response_json)

            try:
                # # Ugly fix of capsule
                # capsule['author'] = {"label": speaker, "type": ["person"]}
                # capsule['timestamp'] = datetime.now()
                #
                # # Link to specific instances
                # linker.link(capsule)
                #
                # # Add capsule to brain
                # print("\tAdding capsule to brain")
                # response = brain.capsule_statement(capsule)
                #
                # # Keep track of responses
                # capsule['rdf_file'] = str(response['rdf_log_path'].stem) + '.trig'
                # capsule_json = brain_response_to_json(capsule)
                # all_capsules.append(capsule_json)
                # response_json = brain_response_to_json(response)
                # all_responses.append(response_json)
                #
                print(f"Utterance: {utterance}, Capsule: {json.dumps(capsule_json, indent=2)}")
            except:
                capsules_skipped += 1
                print(f"\tCapsule skipped. Total skipped: {capsules_skipped}")

    # Save responses 
    f = open(scenario_filepath / "capsules.json", "w")
    json.dump(all_capsules, f)
    f = open(scenario_filepath / "responses.json", "w")
    json.dump(all_responses, f)




In [28]:
main(baking_scenario, baking_scenario_speakers)

2023-10-03 23:38:23 -     INFO -                                    cltl.brain.LongTermMemory - Booted
2023-10-03 23:38:23 -     INFO -                                    cltl.brain.LongTermMemory - Clearing brain
2023-10-03 23:38:23 -     INFO -                                    cltl.brain.LongTermMemory - Uploading ontology to brain
2023-10-03 23:38:23 -     INFO -                                  cltl.brain.ThoughtGenerator - Booted
2023-10-03 23:38:23 -     INFO -                                  cltl.brain.LocationReasoner - Booted
2023-10-03 23:38:23 -     INFO -                                      cltl.brain.TypeReasoner - Booted
2023-10-03 23:38:23 -     INFO -                                   cltl.brain.TrustCalculator - Booted
2023-10-03 23:38:23 -     INFO -                                  cltl.triple_extraction.Chat - << Start of Chat with Selene >>
2023-10-03 23:38:24 -     INFO -                                    cltl.brain.LongTermMemory - Context: context147
0it [0

	Adding capsule to brain


2023-10-03 23:38:25 -     INFO -                                  cltl.brain.ThoughtGenerator - Gaps: 77 gaps as subject: e.g. selene watch movie - 25 gaps as object: e.g. selene be-ancestor-of person
2023-10-03 23:38:25 -     INFO -                                  cltl.brain.ThoughtGenerator - Gaps: 1 gaps as subject: e.g. baking-cakes be-inside container - 2 gaps as object: e.g. baking-cakes own agent
1it [00:01,  1.27s/it]2023-10-03 23:38:25 -     INFO -                                  cltl.triple_extraction.Chat - Lea        001: "I don't like baking cakes"


Utterance: Selene: I like baking cakes, Capsule: {
  "chat": 55,
  "turn": 0,
  "author": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "utterance": "I like baking cakes",
  "utterance_type": "STATEMENT",
  "position": "0-19",
  "subject": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "predicate": {
    "label": "like",
    "type": [
      "emotion"
    ],
    "uri": "http://cltl.nl/leolani/n2mu/like"
  },
  "object": {
    "label": "baking-cakes",
    "type": [
      "change",
      "artifact",
      "object"
    ],
    "uri": "http://cltl.nl/leolani/world/baking-cakes"
  },
  "perspective": {
    "_certainty": "CERTAIN",
    "_polarity": "POSITIVE",
    "_sentiment": "POSITIVE",
    "_time": null,
    "_emotion": "UNDERSPECIFIED"
  },
  "context_id": null,
  "date": "2023-10-03T23:38:24.396847",
  "place": "",
  "place_id": null,
  "country": "

2023-10-03 23:38:26 -     INFO -                          cltl.triple_extraction.cfg_analyzer - Found 2 triples
2023-10-03 23:38:26 -     INFO -                          cltl.triple_extraction.cfg_analyzer - final triple: {'subject': {'label': 'Selene', 'type': ['person']}, 'predicate': {'label': 'like', 'type': ['emotion']}, 'object': {'label': 'baking-cakes', 'type': ['change', 'artifact', 'object']}, 'perspective': {'sentiment': 0.75, 'certainty': 1.0, 'polarity': -1.0, 'emotion': 0.0}} {'neg': True, 'aux': {'tense': 'present', 'person': ['first', 'second', 'third'], 'number': ['singular', 'plural']}}
2023-10-03 23:38:26 -     INFO -                              cltl.triple_extraction.analyzer - GeneralStatementAnalyzer: Utterance type: "STATEMENT"
2023-10-03 23:38:26 -     INFO -                              cltl.triple_extraction.analyzer - GeneralStatementAnalyzer: RDF triplet    subject: {"label": "Selene", "type": ["person"]}
2023-10-03 23:38:26 -     INFO -                    

	Adding capsule to brain


2023-10-03 23:38:27 -     INFO -                                  cltl.brain.ThoughtGenerator - Gaps: 77 gaps as subject: e.g. selene be-grandparent-of person - 25 gaps as object: e.g. selene be-grandmother-of person
2023-10-03 23:38:27 -     INFO -                                  cltl.brain.ThoughtGenerator - Gaps: 1 gaps as subject: e.g. baking-cakes be-inside container - 2 gaps as object: e.g. baking-cakes own agent
2023-10-03 23:38:27 -     INFO -                                    cltl.brain.LongTermMemory - Triple in statement: selene_like_baking-cakes [person_->_change or artifact or object or object])


Utterance: Lea: I don't like baking cakes, Capsule: {
  "chat": 55,
  "turn": 1,
  "author": {
    "label": "Lea",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/lea"
  },
  "utterance": "I don't like baking cakes",
  "utterance_type": "STATEMENT",
  "position": "0-25",
  "subject": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "predicate": {
    "label": "like",
    "type": [
      "emotion"
    ],
    "uri": "http://cltl.nl/leolani/n2mu/like"
  },
  "object": {
    "label": "baking-cakes",
    "type": [
      "change",
      "artifact",
      "object"
    ],
    "uri": "http://cltl.nl/leolani/world/baking-cakes"
  },
  "perspective": {
    "_certainty": "CERTAIN",
    "_polarity": "NEGATIVE",
    "_sentiment": "POSITIVE",
    "_time": null,
    "_emotion": "UNDERSPECIFIED"
  },
  "context_id": null,
  "date": "2023-10-03T23:38:25.668690",
  "place": "",
  "place_id": null,
  "country"

2023-10-03 23:38:27 -     INFO -                                  cltl.brain.ThoughtGenerator - Gaps: 77 gaps as subject: e.g. selene music concert - 25 gaps as object: e.g. selene dislike agent
2023-10-03 23:38:27 -     INFO -                                  cltl.brain.ThoughtGenerator - Gaps: 1 gaps as subject: e.g. baking-cakes be-inside container - 2 gaps as object: e.g. baking-cakes create person
2it [00:03,  1.61s/it]2023-10-03 23:38:27 -     INFO -                                  cltl.triple_extraction.Chat - Selene     002: "I also like baking cookies"


Utterance: Lea: I don't like baking cakes, Capsule: {
  "chat": 55,
  "turn": 1,
  "author": {
    "label": "Lea",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/lea"
  },
  "utterance": "I don't like baking cakes",
  "utterance_type": "STATEMENT",
  "position": "0-25",
  "subject": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "predicate": {
    "label": "like",
    "type": [
      "emotion"
    ],
    "uri": "http://cltl.nl/leolani/n2mu/like"
  },
  "object": {
    "label": "baking-cakes",
    "type": [
      "change",
      "artifact",
      "object"
    ],
    "uri": "http://cltl.nl/leolani/world/baking-cakes"
  },
  "perspective": {
    "_certainty": "CERTAIN",
    "_polarity": "NEGATIVE",
    "_sentiment": "POSITIVE",
    "_time": null,
    "_emotion": "UNDERSPECIFIED"
  },
  "context_id": null,
  "date": "2023-10-03T23:38:25.668690",
  "place": "",
  "place_id": null,
  "country"

2023-10-03 23:38:28 -     INFO -                          cltl.triple_extraction.cfg_analyzer - Found 1 triples
2023-10-03 23:38:28 -     INFO -                          cltl.triple_extraction.cfg_analyzer - final triple: {'subject': {'label': 'Selene', 'type': ['person']}, 'predicate': {'label': 'like', 'type': ['emotion']}, 'object': {'label': 'baking-cookies', 'type': ['change', 'food']}, 'perspective': {'sentiment': 0.75, 'certainty': 1.0, 'polarity': 1.0, 'emotion': 0.0}} {'neg': False}
2023-10-03 23:38:28 -     INFO -                              cltl.triple_extraction.analyzer - GeneralStatementAnalyzer: Utterance type: "STATEMENT"
2023-10-03 23:38:28 -     INFO -                              cltl.triple_extraction.analyzer - GeneralStatementAnalyzer: RDF triplet    subject: {"label": "Selene", "type": ["person"]}
2023-10-03 23:38:28 -     INFO -                              cltl.triple_extraction.analyzer - GeneralStatementAnalyzer: RDF triplet  predicate: {"label": "like", "ty

	Adding capsule to brain





KeyError: 'label'