# From text to eKG

This notebook has the minimum sample code to convert a transcribed interaction into is graph representation.

## Description

IN THIS SCENARIO, TWO PEOPLE DISCUSS SIMPLE PREFERENCE OVER BAKING.

## Prerequisites
1. Download [GraphDB](http://graphdb.ontotext.com/)
2. Launch it
3. Create a repository, you can use [this configuration](https://github.com/leolani/cltl-knowledgerepresentation/blob/main/src/cltl/brain/ontologies/BASIC-REPOSITORY-CONFIG-GRAPHDB.ttl)

In [1]:
# Imports
import json
from datetime import date, datetime
from pathlib import Path
from random import getrandbits

import requests
from cltl.brain.long_term_memory import LongTermMemory
from cltl.brain.utils.helper_functions import brain_response_to_json
from cltl.entity_linking.label_linker import LabelBasedLinker
from cltl.triple_extraction.api import Chat
from cltl.triple_extraction.cfg_analyzer import CFGAnalyzer
# from cltl.triple_extraction.spacy_analyzer import spacyAnalyzer
from cltl.triple_extraction.utils.helper_functions import utterance_to_capsules
from tqdm import tqdm

import logging

from cltl.brain import logger as brain_logger

from cltl.triple_extraction import logger as chat_logger

In [10]:
# Data
baking_scenario = [
    "Selene: I like baking cakes",
    "Lea: I don't like baking cakes",
    "Selene: I also like baking cookies",
    "Lea: I like cookies",
    "Selene: do you like chocolate cookies?",
    "Lea: I love chocolate cookies",
    "Selene: I will bake chocolate cookies tonight"
]
baking_scenario_speakers = ["Lea", "Selene"]  # Person who speaks first has to be the second in the array

In [11]:
def create_context_capsule():
    # Define contextual features
    context_id = getrandbits(8)
    place_id = getrandbits(8)
    location = requests.get("https://ipinfo.io").json()
    start_date = date(2021, 3, 12)

    return {"context_id": context_id,
            "date": start_date,
            "place": "Unknown",
            "place_id": place_id,
            "country": location['country'],
            "region": location['region'],
            "city": location['city']}

In [12]:
def main(scenario, speakers):
    # Set logging levels
    chat_logger.setLevel(logging.ERROR)
    brain_logger.setLevel(logging.ERROR)

    # Create analyzers
    analyzer = CFGAnalyzer()
    linker = LabelBasedLinker()

    # Create folders
    scenario_filepath = Path('./data/baking/')
    graph_filepath = scenario_filepath / Path('graph/')
    graph_filepath.mkdir(parents=True, exist_ok=True)

    # Initialize brain, Chat, 
    brain = LongTermMemory(address="http://localhost:7200/repositories/sandbox",  # Location to save accumulated graph
                           log_dir=graph_filepath,  # Location to save step-wise graphs
                           clear_all=True)  # To start from an empty brain
    chat = Chat(speakers[0], speakers[1])

    # Create context
    context_capsule = create_context_capsule()
    brain.capsule_context(context_capsule)

    # Loop through utterances
    all_responses = []
    all_capsules = []
    capsules_skipped = 0
    for turn, utterance in tqdm(enumerate(scenario)):
        # split speaker from utterance
        speaker, clean_utterance = utterance.split(': ')

        # add utterance to chat and use CFG analyzer to analyze
        chat.add_utterance(clean_utterance, speaker)
        analyzer.analyze(chat.last_utterance) # TODO: fix that last utterance speaker is used over chat speaker
        capsules = utterance_to_capsules(chat.last_utterance)

        # add statement capsules to brain
        for capsule in capsules:
            try:
                # Ugly fix of capsule
                capsule['author'] = {"label": speaker, "type": ["person"]}
                capsule['timestamp'] = datetime.now()
                
                # Link to specific instances
                linker.link(capsule)
                
                # Add capsule to brain
                print("\tAdding capsule to brain")
                response = brain.capsule_statement(capsule, reason_types=True, create_label=True) # Fix problem with overlaps?
                
                # Keep track of responses
                capsule['rdf_file'] = str(response['rdf_log_path'].stem) + '.trig'
                capsule_json = brain_response_to_json(capsule)
                all_capsules.append(capsule_json)
                response_json = brain_response_to_json(response)
                all_responses.append(response_json)
                
                # print(f"Utterance: {utterance}, Capsule: {json.dumps(capsule_json, indent=2)}")
            except:
                capsules_skipped += 1
                print(f"\tCapsule skipped. Total skipped: {capsules_skipped}\n{json.dumps(brain_response_to_json(capsule), indent=2)}")

    # Save responses 
    f = open(scenario_filepath / "capsules.json", "w")
    json.dump(all_capsules, f)
    f = open(scenario_filepath / "responses.json", "w")
    json.dump(all_responses, f)




In [13]:
main(baking_scenario, baking_scenario_speakers)

0it [00:00, ?it/s]

	Adding capsule to brain


1it [00:01,  1.05s/it]

	Adding capsule to brain
	Adding capsule to brain


3it [00:03,  1.13s/it]

	Adding capsule to brain
	Capsule skipped. Total skipped: 1
{
  "chat": 183,
  "turn": 2,
  "author": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "utterance": "I also like baking cookies",
  "utterance_type": "STATEMENT",
  "position": "0-26",
  "subject": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "predicate": {
    "label": "like",
    "type": [
      "emotion"
    ],
    "uri": "http://cltl.nl/leolani/n2mu/like"
  },
  "object": {
    "label": "baking-cookies",
    "type": [
      "change",
      "food"
    ],
    "uri": "http://cltl.nl/leolani/world/baking-cookies"
  },
  "perspective": {
    "_certainty": "CERTAIN",
    "_polarity": "POSITIVE",
    "_sentiment": "POSITIVE",
    "_time": null,
    "_emotion": "UNDERSPECIFIED"
  },
  "context_id": null,
  "date": "2023-10-04T00:37:16.839037",
  "place": "",
  "place_id": null,
  "country"

4it [00:04,  1.12it/s]

	Adding capsule to brain
	Capsule skipped. Total skipped: 2
{
  "chat": 183,
  "turn": 3,
  "author": {
    "label": "Lea",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/lea"
  },
  "utterance": "I like cookies",
  "utterance_type": "STATEMENT",
  "position": "0-14",
  "subject": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "predicate": {
    "label": "like",
    "type": [
      "emotion"
    ],
    "uri": "http://cltl.nl/leolani/n2mu/like"
  },
  "object": {
    "label": "cookies",
    "type": [
      "food"
    ],
    "uri": "http://cltl.nl/leolani/world/cookies"
  },
  "perspective": {
    "_certainty": "CERTAIN",
    "_polarity": "POSITIVE",
    "_sentiment": "POSITIVE",
    "_time": null,
    "_emotion": "UNDERSPECIFIED"
  },
  "context_id": null,
  "date": "2023-10-04T00:37:17.477160",
  "place": "",
  "place_id": null,
  "country": "",
  "region": "",
  "city": "",
  "objects":

5it [00:04,  1.33it/s]

	Adding capsule to brain
	Capsule skipped. Total skipped: 3
{
  "chat": 183,
  "turn": 4,
  "author": {
    "label": "Selene",
    "type": [
      "person"
    ],
    "uri": "http://cltl.nl/leolani/world/selene"
  },
  "utterance": "do you like chocolate cookies?",
  "utterance_type": "QUESTION",
  "position": "0-30",
  "subject": {
    "label": "Lea",
    "type": [
      "quantity"
    ],
    "uri": "http://cltl.nl/leolani/world/lea"
  },
  "predicate": {
    "label": "like",
    "type": [
      "emotion"
    ],
    "uri": "http://cltl.nl/leolani/n2mu/like"
  },
  "object": {
    "label": "chocolate-cookies",
    "type": [
      "food",
      "food"
    ],
    "uri": "http://cltl.nl/leolani/world/chocolate-cookies"
  },
  "perspective": {
    "_certainty": "UNDERSPECIFIED",
    "_polarity": "UNDERSPECIFIED",
    "_sentiment": "UNDERSPECIFIED",
    "_time": null,
    "_emotion": "UNDERSPECIFIED"
  },
  "context_id": null,
  "date": "2023-10-04T00:37:18.015143",
  "place": "",
  "place_

6it [00:06,  1.04s/it]

	Adding capsule to brain
	Adding capsule to brain
	Adding capsule to brain
	Adding capsule to brain
	Adding capsule to brain
	Adding capsule to brain


7it [00:10,  1.49s/it]
