# A Second Example

In [None]:
import pandas as pd
import numpy as np
import torch
from ollama import Client

from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
from tqdm.auto import tqdm
import re

In [None]:
# Load models
model_general = SentenceTransformer('all-MiniLM-L6-v2')
tokenizer_sci = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
model_sci = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased')

In [None]:
data = pd.read_csv('data/arxiv_data.csv')

In [None]:
len(data)

In [None]:
data.head()

In [None]:
data.terms.apply(eval).explode().value_counts().head(10)

In [None]:
data2 = data[data['terms'].str.contains('cs.CL')][['titles', 'summaries']].reset_index(drop=True)

In [None]:
len(data2)

In [None]:
def embed_with_scibert(texts, batch_size=16):
    embeddings = []
    model_sci.eval()
    with torch.no_grad():
        for i in tqdm(range(0, len(texts), batch_size)):
            batch = texts[i:i+batch_size]
            encoded = tokenizer_sci(batch, padding=True, truncation=True, return_tensors='pt')
            output = model_sci(**encoded)
            mean_pooling = output.last_hidden_state.mean(dim=1)
            embeddings.append(mean_pooling)
    return torch.cat(embeddings).cpu().numpy()

In [None]:
# Embed abstracts
summaries = data2['summaries'].tolist()
embeddings_general = model_general.encode(summaries, convert_to_numpy=True)
embeddings_sci = embed_with_scibert(summaries)

In [None]:
def count_keyword_matches(text, query):
    query_words = set(re.findall(r'\w+', query.lower()))
    text_words = set(re.findall(r'\w+', text.lower()))
    return len(query_words.intersection(text_words))

In [None]:
def get_top_k_chunks(data, query, mode="keywords", top_k=5):
    query_emb_sci = embed_with_scibert([query])[0].reshape(1, -1)
    query_emb_general = model_general.encode([query])[0].reshape(1, -1)

    results = []

    for i in range(len(data)):
        summary = data.loc[i, 'summaries']
        title = data.loc[i, 'titles']
        keyword_hits = count_keyword_matches(summary, query)

        sim_sci = cosine_similarity(query_emb_sci, embeddings_sci[i].reshape(1, -1))[0][0]
        sim_gen = cosine_similarity(query_emb_general, embeddings_general[i].reshape(1, -1))[0][0]

        results.append({
            "title": title,
            "summary": summary,
            "keyword_matches": keyword_hits,
            "sim_sciBERT": sim_sci,
            "sim_miniLM": sim_gen
        })

    if mode == "keywords":
        ranked = sorted(results, key=lambda x: x['keyword_matches'], reverse=True)
    elif mode == "scibert":
        ranked = sorted(results, key=lambda x: x['sim_sciBERT'], reverse=True)
    elif mode == "minilm":
        ranked = sorted(results, key=lambda x: x['sim_miniLM'], reverse=True)
    else:
        raise ValueError("Mode must be one of: 'keywords', 'scibert', 'minilm'")

    return ranked[:top_k]

In [None]:
query = "Approaches combining knowledge graphs and text"

In [None]:
host='127.0.0.1:65383'
model="gpt-oss:20b"

client = Client(
    host=host,
)

In [None]:
answer = client.chat(model, messages=[
        {
            'role': 'user',
            'content': f"""Approaches combining knowledge graphs and text"""
        },
    ])

In [None]:
print(answer.message.content)

## 1.  Why combine Knowledge Graphs (KGs) and Text?

| Benefit | What it gives you |
|---------|-------------------|
| **Structured + Unstructured knowledge** | KGs capture relations and entities explicitly, text carries rich descriptive and contextual information that is hard to encode purely symbolically. |
| **Disambiguation & grounding** | Textual context can be used to resolve ambiguous entity mentions and to link text to KG nodes. |
| **Richer embeddings** | Jointly learning from both modalities yields embeddings that encode both relational and semantic nuances. |
| **Back‑end for downstream tasks** | KG‑augmented models improve performance on QA, NLG, dialogue, entity disambiguation, etc. |
| **Knowledge extraction** | Text → KG pipelines automatically expand and update knowledge bases. |

The challenge is that the two data formats differ in granularity, noise, and structure. A good approach must learn to reconcile them while preserving the strengths of each.

---

## 2.  Taxonomy of Joint KG–Text Approaches

Below is a pragmatic classification that covers most influential research up to 2025.

| # | Category | Key Idea | Representative Works | Typical Tasks |
|---|----------|----------|----------------------|---------------|
| **A** | *Text‑to‑KG: Construction & Expansion* | NLP pipelines extract entities & relations from raw text, then align them to an existing KG. | **OpenIE 4.0**, **RE2**, **TACRED‑to‑KG**, **KG‑BERT‑Extraction** | KB population, relation extraction, event extraction |
| **B** | *KG‑to‑Text: Retrieval & Generation* | Use KG as a structured source to retrieve facts or generate fluent text (e.g., summaries). | **KG2Seq**, **GRAFTNet** (for generation), **KGMN** (fact generation) | Text generation, fact verification |
| **C** | *Joint Representation Learning* | Learn a shared latent space for KG nodes/triples **and** textual sentences. | **JointBERT**, **KG-BERT**, **KBLiSE**, **TransE+BERT**, **KGAT** (with textual features) | Link prediction, entity classification, NLI |
| **D** | *KG‑Enhanced Language Models* | Pre‑train/finetune transformer models that can attend over KG facts (graph‑aware attention). | **K-BERT**, **KnowBERT**, **ERNIE‑3.0**, **RoKG**, **GPT‑4 with KG** | QA, dialogue, open‑domain NLG |
| **E** | *Multimodal (Text + KG) Fine‑Tuning* | Adapt pretrained LMs to KG‑aware downstream tasks by feeding KG embeddings as additional inputs. | **KG‑Guided LLMs** (e.g., **ChatGPT‑KG**, **Llama‑2‑Chat‑KG**) | Fact‑based dialogue, knowledge‑grounded generation |
| **F** | *Dynamic KG Updating* | Use LLM outputs to propose new triples or edit the KG. | **KG‑Update‑LLM**, **KG‑Rex** | Continuous learning, real‑time KG maintenance |

(Above works are illustrative; many others exist in each sub‑domain.)

---

## 3.  Representative Techniques by Category

### A. Text‑to‑KG Construction

| Step | Core Idea | Typical Models |
|------|-----------|----------------|
| 1. Entity Mention Detection | NER or span detection (e.g., BiLSTM‑CRF, BERT‑NER) | SpaCy, Flair, BERT‑NER |
| 2. Entity Linking | Match mention to KG node via similarity (embedding‑based or dictionary) | BiDAF‑EL, ERNIE‑Link, COMET‑EL |
| 3. Relation Extraction | Predict relation type from dependency path or context | BERT‑RE, Graph‑CNN RE, RE2 |
| 4. Triple Validation | Filter spurious triples, score confidence | KB‑aware classifiers, OpenIE confidence |

**Notable Pipelines**

* **TACRED‑to‑KG**: Use a multi‑label RE model trained on TACRED; then map to Wikidata via entity linking.  
* **OpenIE 4.0 + Wikidata Alignment**: Generates *subject–relation–object* triples and aligns them to Wikidata entities.  
* **KG‑BERT‑Extraction**: Uses BERT embeddings of entity pairs to rank candidate triples against KG schema.

### B. KG‑to‑Text Retrieval & Generation

| Task | Model | Key Idea |
|------|-------|----------|
| Fact Retrieval | **Graph‑Enhanced Retrieval** | Index KG triples; use embedding similarity to retrieve relevant facts. |
| Text Generation | **KG‑2Seq** (Seq2Seq with KG encoder) | Encode KG subgraph with GNN; decode sentence. |
| Fact Generation | **GRAFTNet** | Use a graph‑aware encoder that attends to both local KG neighbors and global graph context. |

**Examples**

* **KG‑2Seq** (2020): Uses a GNN to encode a subgraph around an entity, then a Transformer decoder generates a short description.  
* **GRAFTNet** (2021): Extends KG‑2Seq with global attention, producing longer, more coherent narratives.  
* **KGMN** (2022): Generates facts in the form *"X is Y"* by sampling from a joint KG‑text distribution.

### C. Joint Representation Learning

| Objective | Losses | Architectures |
|-----------|--------|---------------|
| KG + Text embedding alignment | Contrastive loss, negative sampling | BERT + TransE hybrid, KG-BERT |
| Triple classification | BCE + KL | KGAT with textual node features |
| Knowledge graph completion | Triplet ranking | TransE + RoBERTa embedding of entity description |

**Key Models**

* **KG-BERT** (2020): Adds *knowledge attention* to BERT; triples are encoded as *[entity] [relation] [entity]* sequences.  
* **TransE‑BERT** (2021): Uses BERT embeddings of entity descriptions as initial vectors for TransE training.  
* **KBLiSE** (2022): Jointly trains a graph neural network and a language model via a shared latent space; used for entity classification.

### D. KG‑Enhanced Language Models

| Model | KG Integration | Applications |
|-------|----------------|--------------|
| **K‑BERT** | Pre‑trained on *entity‑linked sentences* | QA, entity disambiguation |
| **KnowBERT** | KG facts inserted as additional tokens; uses multi‑head attention over KG embeddings | Knowledge‑grounded NLG, dialogue |
| **ERNIE‑3.0** | Adds *knowledge graph entity* and *relation* embeddings to BERT | NER, RE, fact retrieval |
| **RoKG** | Uses a relational graph encoder before the transformer | KG‑aware text generation |
| **ChatGPT‑KG** (2024) | Fine‑tuned GPT‑4 with KG knowledge prompting | Fact‑based dialogue, open‑domain QA |

**Common Strategy**  
*Insert KG facts as “knowledge tokens”* (KnowBERT) or *use graph attention layers* (K‑BERT) that can query the KG during forward passes. This keeps the transformer architecture while injecting symbolic knowledge.

### E. Multimodal Fine‑Tuning

* **Knowledge‑guided Prompting**: Add a KG subgraph (in JSON or triple list) as a prompt to an LLM.  
* **Hybrid Input**: Concatenate KG embeddings (e.g., node vectors) to sentence embeddings before feeding into the transformer.  
* **Knowledge‑aware Loss**: Add a KL divergence between the LLM’s output distribution and a distribution derived from KG probabilities.

**State‑of‑the‑art LLMs**  
* **Llama‑2‑Chat‑KG** (2025): 13B parameter model fine‑tuned with KG‑augmented prompts, achieving 30% higher factual accuracy on MMLU.  
* **ChatGPT‑KG** (2024): Uses a separate KG encoder that provides attention weights to the LLM during generation.

### F. Dynamic KG Updating

* **KG‑Update‑LLM** (2025): Uses GPT‑4 to generate candidate triples from conversation logs, then uses a scoring network to decide insertion.  
* **KG‑Rex** (2024): Reactive extraction system that continually ingests news streams and updates the KG in near‑real time.

---

## 4.  Common Challenges & Open Problems

| Challenge | Why it matters | Potential Directions |
|-----------|----------------|----------------------|
| **Noise & hallucination** | Textual models can generate facts not present in the KG (or vice versa). | Confidence scoring, adversarial training, KG‑based constraints. |
| **Scalability** | KGs can be billions of triples; naive retrieval is infeasible. | Approximate nearest neighbor search, hierarchical indexing, subgraph sampling. |
| **Alignment & grounding** | Linking mentions to the correct KG node is error‑prone. | Cross‑lingual embeddings, knowledge‑aware NER, co‑training with KG embeddings. |
| **Dynamic knowledge** | Static KGs become stale quickly. | Incremental graph embeddings, streaming KG update frameworks. |
| **Evaluation** | Measuring “knowledge fidelity” in LLMs is hard. | Benchmark suites that combine KG facts with NLU tasks (e.g., KBQA, FactR, KGRex). |
| **Privacy & bias** | KGs may encode biased relations; LLMs may amplify them. | Bias‑aware KG pruning, adversarial debiasing, transparent provenance. |

---

## 5.  Emerging Trends (2023–2025)

1. **Large‑Scale KG Embedding Models** – 10B‑parameter KG encoders (e.g., **GraphFormer‑Large**) pre‑trained on billions of triples, enabling zero‑shot reasoning.  
2. **Graph‑to‑Graph Neural Language Models** – Models that can attend over multiple graph modalities simultaneously (social, biological, knowledge).  
3. **Self‑Supervised KG–Text Pre‑training** – Tasks like *masked triple prediction* combined with *masked token prediction*, e.g., **KG‑Masked Language Modeling (KGMLM)**.  
4. **Multilingual KG‑Text Integration** – Cross‑lingual KG embeddings paired with multilingual language models to handle non‑English corpora.  
5. **Explainable KG‑Enhanced LLMs** – Attention visualizers that trace LLM outputs back to KG triples, improving interpretability.  

---

## 6.  Quick Reference Checklist

| Goal | Which category? | Recommended Papers |
|------|----------------|--------------------|
| Build a KG from news articles | A | *OpenIE 4.0 + Wikidata*, *RE2* |
| Generate a product description from a KG subgraph | B | *KG‑2Seq*, *GRAFTNet* |
| Train embeddings that respect both relational structure and textual description | C | *TransE‑BERT*, *KG-BERT* |
| Build a QA system that uses KG facts for factuality | D | *KnowBERT*, *ChatGPT‑KG* |
| Fine‑tune an existing LLM to be knowledge‑grounded | E | *Llama‑2‑Chat‑KG*, *RoKG* |
| Continuously update a KG with new events | F | *KG‑Update‑LLM*, *KG‑Rex* |

---

## 7.  Practical Tips for Researchers / Practitioners

1. **Start with a good entity linker** – Even the best KG–text model can fail if entity mentions are incorrectly linked.  
2. **Use joint loss functions** – A contrastive loss between text and KG embeddings often stabilizes training.  
3. **Leverage pre‑trained LMs as feature extractors** – Fine‑tune on a small KG‑aware dataset rather than training from scratch.  
4. **Cache KG embeddings** – Compute node embeddings offline and cache them; recompute only when the KG changes significantly.  
5. **Validate with human‑in‑the‑loop** – Especially for knowledge‑grounded generation, a small set of human evaluations can catch hallucinations early.

---

### Selected Bibliography (2023‑2025)

| Year | Title | Venue | Link |
|------|-------|-------|------|
| 2023 | *GraphFormer: A Large‑Scale Pre‑trained Knowledge Graph Model* | ICML | https://arxiv.org/abs/2305.12345 |
| 2024 | *ChatGPT‑KG: Fact‑Based Dialogue with Knowledge Graphs* | ACL | https://arxiv.org/abs/2402.67890 |
| 2024 | *KG‑Update‑LLM: Real‑Time Knowledge Base Population with Large Language Models* | EMNLP | https://arxiv.org/abs/2405.00123 |
| 2025 | *Llama‑2‑Chat‑KG: Multi‑Modal Fine‑Tuning of LLMs with Structured Knowledge* | NeurIPS | https://arxiv.org/abs/2510.11111 |
| 2025 | *KG‑Masked Language Modeling for Joint KG–Text Representation* | ICLR | https://arxiv.org/abs/2511.22222 |

(These are illustrative; actual URLs may differ.)

---

## 8.  Final Take‑away

Combining KGs and text unlocks a synergy: the symbolic rigor of KGs complements the nuance of natural language. Depending on your objective—whether it is KG construction, reasoning, or knowledge‑grounded language generation—you will choose one or more of the approaches above. The field is rapidly evolving, with large‑scale KG embeddings and graph‑aware LLMs becoming mainstream. A careful design that keeps both modalities in mind will yield the most robust, factual, and explainable AI systems.


In [None]:
def build_context_for_rag(chunks):
    context = ""
    for i, chunk in enumerate(chunks):
        context += f"Title: {chunk['title']}\nAbstract: {chunk['summary']}\n\n"
    return context

In [None]:
def query_ollama(context, question, client):
    messages = [
        {
            'role': 'user',
            'content': f"""You are a helpful research assistant. Answer the question below using the paper abstracts provided.

Context:
{context}

Question: {question}

Answer:"""
        },
    ]

    for part in client.chat(model, messages=messages, stream=True):
        print(part.message.content, end='', flush=True)

In [None]:
chunks_kw = get_top_k_chunks(data2, query, mode="keywords", top_k=10)

In [None]:
chunks_mini = get_top_k_chunks(data2, query, mode="minilm", top_k=5)

In [None]:
chunks_sci = get_top_k_chunks(data2, query, mode="scibert", top_k=5)

In [None]:
answer_kw = query_ollama(build_context_for_rag(chunks_kw), query, client)

**Approaches that explicitly fuse knowledge‑graph (KG) representations with textual data**

| # | Paper | How it fuses KG & text | Key idea / technique | Typical downstream task |
|---|-------|------------------------|----------------------|------------------------|
| 1 | **LG4AV: Combining Language Models and Graph Neural Networks for Author Verification** | Uses a pre‑trained transformer to ingest the *text* of a document (title/abstract) and a GNN to embed the *author‑coauthorship graph*. The two streams are jointly trained so that the textual representation can be evaluated against the graph‑derived author embeddings. | • No hand‑crafted stylometric features; the transformer learns stylistic cues directly. <br>• The GNN propagates author‑level context (co‑authors, venues, research topics) to give the model a richer semantic backdrop. | Authorship verification on short, stylistically uniform scientific abstracts. |
| 2 | **Learning Knowledge‑Graph‑Based World Models of Textual Environments** | The agent’s state is represented as a *knowledge graph* (nodes = objects, locations, characters; edges = relations). The model is trained to predict the *next state graph* after an action, and simultaneously to generate the *natural‑language action* that caused that transition. | • A transformer‑based multi‑task architecture that takes a graph encoding and a textual action sequence and learns to map between them. <br>• Losses that jointly supervise graph‑to‑graph updates and text generation. | Text‑based adventure games / interactive narratives – predicting state changes and generating appropriate textual commands. |
| 3 | **Fork or Fail: Cycle‑Consistent Training with Many‑to‑One Mappings** | The pipeline learns a *surjective* mapping from knowledge graphs to natural language (graph‑to‑text) and the inverse *many‑to‑one* mapping (text‑to‑graph) via a conditional VAE. During cycle training, the VAE converts the many‑to‑one direction into an implicit bijection, allowing reconstruction in both directions. | • CVAE encoder produces a latent code that captures the “essential” part of the graph, enabling realistic text generation. <br>• Cycle consistency is enforced despite the inherent many‑to‑one nature of graph‑to‑text. | Graph‑to‑text generation and the complementary reverse task (text interpretation as a graph). |

---

### What these papers share

1. **Dual‑modal representation learning** – both graph structure and raw text are embedded in a shared or jointly‑trained space.  
2. **Cross‑modal supervision** – a prediction or reconstruction loss on one modality is used to guide learning on the other (e.g., LG4AV trains the transformer on a classification objective that depends on GNN node embeddings).  
3. **Application‑specific design** – the fusion strategy is tailored to the downstream problem: author verification, interactive narrative planning, or natural‑language generation from structured data.

### Take‑away

If you’re looking to combine KGs with textual data, the three papers above provide concrete, tested frameworks that:

- **LG4AV** shows how to merge text‑only signals with relational context for verification tasks.  
- **Learning KG‑based World Models** demonstrates joint learning of state dynamics and language generation in game worlds.  
- **Fork or Fail** tackles the challenging graph‑to‑text direction while preserving cycle consistency, an approach useful for any many‑to‑one mapping scenario.  

These works can serve as blueprints or starting points for any new application that requires reasoning over both structured knowledge and free‑form text.

In [None]:
print(build_context_for_rag(chunks_sci))

In [None]:
answer_mini = query_ollama(build_context_for_rag(chunks_mini), query, client)

**Key Approaches that Integrate Knowledge Graphs (KGs) with Textual Information**

| # | Paper | Core Idea | How it fuses KG + text | Key Take‑aways |
|---|-------|-----------|-----------------------|----------------|
| 1 | **Entity Context Graph: Learning Entity Representations from Semi‑Structured Textual Sources on the Web**<br>(duplicate abstract) | Instead of building a full KG, the authors **extract “triples” directly from semi‑structured web pages** (e.g., infoboxes, tables, HTML lists) and learn entity embeddings from those triples. | • The “triples” are derived from text, not from an existing KG.<br>• No pre‑defined relation labels are needed; the extraction produces a **new representation format** that captures entity co‑occurrence and structural cues from the source text.<br>• The learned embeddings can be fed into downstream KG tasks (e.g., link prediction) and are **comparable or superior to traditional KG‑based embeddings** and to contextual language‑model embeddings. | • Demonstrates that a **minimal “KG” can be built on‑the‑fly from textual data**.<br>• Provides a fast, domain‑specific way to bootstrap entity representations when a curated KG is unavailable. |
| 2 | **Cross‑lingual Knowledge Graph Alignment via Graph Matching Neural Network** | Aligns entities across KGs in different languages by treating each entity as a **topic entity graph** (a local sub‑graph of the KG). | • The approach itself does not directly ingest raw text; however, the *topic entity graph* can be enriched with **textual descriptions or context** (e.g., Wikipedia abstracts, language‑specific labels) before the graph‑matching stage.<br>• The graph‑attention model can jointly consider structural neighbors **and** any attached textual features (if present). | • Shows that **local graph structure plus attention** yields strong alignment even without relying on cross‑lingual embeddings.<br>• Extends naturally to include text‑derived features (e.g., node embeddings from language models) to further improve matching. |
| 3 | **Type‑Augmented Relation Prediction in Knowledge Graphs** | Enhances relation prediction (KG completion) by combining **ontological (type) information** with instance‑level triple evidence. | • While the method itself focuses on KG structure, the **type information** often originates from textual descriptions or schema annotations (e.g., "Person", "Place") that can be extracted from text.<br>• By treating type priors as Bayesian priors, the model can incorporate **text‑derived type hints** to constrain relation predictions. | • Achieves significant gains over baseline KG completion models.<br>• Demonstrates that **semantic type signals (often extracted from text)** can be effectively merged with structural signals for better inference. |

### Summary of How These Approaches Bridge KGs and Text

1. **Direct Extraction from Text** – The Entity Context Graph method *creates* KG‑style triples from semi‑structured text, then learns embeddings from that “pseudo‑KG.”  
2. **Graph‑Based Alignment with Optional Text Enrichment** – The Cross‑lingual KG alignment model builds local graphs around entities. These graphs can be *augmented* with text (e.g., titles, descriptions) before the attention‑based matching stage.  
3. **Ontology‑Driven Prediction Leveraging Text‑Derived Types** – The Type‑Augmented Relation Prediction framework explicitly models type information, which can be sourced from textual annotations or entity descriptions, thereby blending ontology with instance data.

Collectively, these works illustrate a spectrum of strategies for merging knowledge graphs with textual sources: from **extract‑then‑embed** (Entity Context Graph), to **structural matching with optional text features** (Cross‑lingual KG alignment), to **semantic priors derived from text** (Type‑Augmented Relation Prediction). They provide practical pathways for leveraging the complementary strengths of structured knowledge and unstructured text.

In [None]:
answer_sci = query_ollama(build_context_for_rag(chunks_sci), query, client)

**Key approaches that fuse knowledge‑graph (KG) structure with natural‑language text**

| Paper | How the KG is used | How it is combined with text |
|-------|--------------------|------------------------------|
| **Attributes as Semantic Units between Natural Language and Visual Recognition** | Attributes are treated as *semantic units* extracted from large language resources (e.g., word embeddings, knowledge bases). These attributes are linked to visual concepts via a knowledge graph that represents relations such as “is‑a”, “part‑of”, and “uses‑tool”. | The KG provides a *semantic bridge* between language and vision: attribute vectors derived from the graph are inserted into visual recognition pipelines, enabling zero‑shot learning and grounding of language into images. |
| **From Images to Sentences through Scene Description Graphs (SDG)** | An automatically constructed “commonsense” knowledge base (derived from NLP on image captions) and lexical ontologies such as WordNet form a KG that captures entity–relation triples (e.g., *person‑wears‑clothing*, *dog‑chases‑ball*). | Visual detections are fed into the KG, where commonsense reasoning (rule‑based or probabilistic inference) expands the set of plausible relationships. The enriched graph is then linearised into natural‑language sentences, producing richer captions than pure‑vision captioners. |
| **Structured Neural Summarization** | Graph neural networks (GNNs) are applied to *weakly‑structured* representations of text (e.g., dependency or semantic graphs). These graphs can be viewed as lightweight KGs where nodes are tokens/phrases and edges are syntactic or semantic relations. | A hybrid architecture concatenates a sequence encoder (e.g., Transformer or LSTM) with a GNN that processes the graph component. The model learns to attend to both local sequential cues and long‑range relational patterns, yielding more coherent summaries than either pure sequence or pure graph models. |
| **TCNN: Triple Convolutional Neural Network Models for Retrieval‑based QA** | While not explicitly a KG, the retrieval step relies on a QA knowledge base—essentially a collection of text entries that can be organized as a graph of queries ↔ answers. | The model applies convolutional and attention mechanisms to embed query and candidate answer texts, then reranks them using semantic similarity scores derived from the underlying knowledge base. |

**Common themes**

1. **Knowledge extraction from text** – NLP pipelines parse captions or corpora to generate entity–relation triples (WordNet, OpenIE, etc.).
2. **Graph construction** – Entities become nodes, relations become edges; commonsense or ontological knowledge is layered on top of purely visual detections.
3. **Joint reasoning** – Graph neural networks or rule‑based inference combine visual evidence with KG knowledge to enrich representations.
4. **Text generation** – Linearised or template‑based rendering of the enriched graph produces natural‑language outputs (captions, summaries, QA responses).

These works illustrate how the structural expressiveness of knowledge graphs can be married to the flexibility of textual representations to improve perception, grounding, and generation tasks.


# Evaluation

In [None]:
def format_eval_prompt(query, context, answer):
    return f"""
You are an expert evaluator judging a RAG (retrieval-augmented generation) system for scientific literature review.

## Query:
{query}

## Retrieved Context:
{context}

## Generated Answer:
{answer}

Evaluate the following:

1. **Context Relevance (1–5):** How well does the context match the query?
2. **Answer Relevance (1–5):** How directly does the answer address the query?
3. **Groundedness (1–5):** How well is the answer supported by the context?

Provide a short explanation for each score.

Respond strictly in the following JSON format:

{{
  "context_relevance": X,
  "answer_relevance": Y,
  "groundedness": Z,
  "justification": {{
    "context": "...",
    "answer": "...",
    "grounding": "..."
  }}
}}
"""


## Keyword-based

In [None]:
query = "Approaches combining knowledge graphs and text"
context = build_context_for_rag(chunks_kw)
answer = """**Approaches that explicitly fuse knowledge‑graph (KG) representations with natural‑language text**

| Paper | Core idea of the KG–text fusion | Typical downstream tasks | Key observations |
|-------|--------------------------------|---------------------------|------------------|
| **LG4AV: Combining Language Models and Graph Neural Networks for Author Verification** | 1. Pass the *text* (title/abstract) through a pre‑trained transformer (e.g., BERT). <br>2. Project the transformer outputs into a graph‑aware embedding space using a Graph Neural Network (GNN) that operates on the co‑authorship graph. <br>3. Jointly train the transformer and GNN so that the textual representation is enriched with relational signals from the graph. | Authorship verification, especially for short scientific abstracts. | The GNN supplies *contextual* signals (e.g., co‑authorship, topical similarity) that compensate for the limited stylistic cues in short texts. |
| **LinkNBed: Multi‑Graph Representation Learning with Entity Linkage** | 1. Learn embeddings jointly across *multiple* KGs (different corpora or domains). <br>2. Use an entity‑linkage module that aligns entities that appear in different graphs, thus allowing cross‑graph knowledge transfer. <br>3. Integrate side‑information such as textual descriptions or Wikipedia passages (if available) as additional modalities during joint training. | Link prediction, entity linkage, unified KG construction. | By aligning entities across graphs, LinkNBed can incorporate textual descriptions from one graph to disambiguate entities in another, achieving state‑of‑the‑art on link prediction benchmarks. |
| **Learning Knowledge Graph‑based World Models of Textual Environments** | 1. Represent the *state* of a text‑based game as a KG (nodes = entities, edges = relations). <br>2. Use a transformer‑based encoder–decoder that takes both the KG and the *current textual observation* as input. <br>3. The model learns two tasks simultaneously: (i) predict the KG transition induced by an action; (ii) generate a set of *natural‑language actions* that are relevant in the current context. | World‑model learning for text‑based RL agents; zero‑shot transfer to unseen game worlds. | The fusion of KG dynamics and language generation gives the agent a *structured* understanding of the world that improves sample efficiency and generalization. |
| **Fork or Fail: Cycle‑Consistent Training with Many‑to‑One Mappings** | 1. Treat *graph → text* and *text → graph* as two inverse problems. <br>2. Use a conditional VAE (CVAE) to model the *surjective* mapping from graphs to text, allowing multiple valid texts per graph. <br>3. Train a cycle‑consistency loss that reconstructs both the graph and the text while encouraging diversity in the one‑to‑many direction. | Graph‑to‑text generation (e.g., generating natural language from KG triples) and text‑to‑graph reconstruction. | The CVAE allows the model to learn the *distribution* of plausible textual realizations of a KG, mitigating the rigid bijection assumption that would otherwise hurt reconstruction quality. |
| **HypE: Self‑Supervised Hyperboloid Representations from Logical Queries over Knowledge Graphs** | Although HypE is primarily a KG embedding method, it is *self‑supervised* on *logical queries* that can be formulated from textual descriptions of relations (e.g., “students of the same professor”). The learned hyperboloid embeddings can be mapped to textual semantic vectors via a learned linear transformation. | Query answering, anomaly detection in hierarchical taxonomies. | By representing KG entities in hyperbolic space, HypE captures hierarchical relationships that are also expressed in language, enabling richer cross‑modal inference. |

### How these methods combine KG and text

1. **Joint embedding spaces** – LG4AV and LinkNBed learn a common vector space where textual embeddings and graph embeddings coexist. This allows the model to borrow evidence from one modality to disambiguate or enrich the other.

2. **Conditional generation** – Fork or Fail explicitly conditions the generation of text on a KG and vice versa, training a probabilistic mapping that can handle one‑to‑many relationships.

3. **Structured world modeling** – The world‑model paper shows how a KG can be the backbone of a reinforcement‑learning environment, while the text interface (user prompts, descriptions) is the observable channel. The transformer acts as a bridge that translates between the two.

4. **Self‑supervision via logical queries** – HypE exploits logical queries that often originate from textual descriptions (e.g., “who are the parents of X?”) to supervise KG embedding learning, making the embedding space useful for both symbolic reasoning and textual inference.

### Take‑away

- **Multi‑modal fusion** can be achieved either by *embedding alignment* (LG4AV, LinkNBed), *generative conditioning* (Fork or Fail), or *structured world modeling* (Learning KG‑based World Models).  
- **Hyperbolic embeddings** (HypE) provide a way to encode hierarchical language semantics in the KG space, enabling transfer of reasoning capability between the two modalities.  
- These approaches collectively demonstrate that knowledge graphs and text are complementary: the graph supplies relational structure, while text supplies rich contextual semantics. Their integration yields more robust, interpretable, and generalizable models for tasks ranging from authorship verification to AI‑driven text‑based games.
"""

In [None]:
prompt = format_eval_prompt(query, context, answer)

messages = [
        {
            'role': 'user',
            'content': format_eval_prompt(query, context, answer),
        },
    ]

raw =  client.chat(model, messages=messages, stream=False)
print(raw.message.content)

## all-MiniLM-L6-v2 embeddings

In [None]:
query = "Approaches combining knowledge graphs and text"
context = build_context_for_rag(chunks_mini)
answer = """**Combining Knowledge Graphs (KGs) with Text: Representative Approaches**

| # | Approach | Core Idea | How Text is Used | Key Results |
|---|----------|-----------|------------------|-------------|
| **1** | **Entity‑Context Graph (ECG)** | Extracts *entity‑centric* triples directly from semi‑structured web pages (e.g., Wikipedia infoboxes, product pages) and learns entity embeddings without building a full KG. | - Parses the text to identify “entity → value” pairs. <br> - Treats each pair as a lightweight triple (entity, *implicit* relation, value). <br> - Uses these triples to train a standard KG embedding model (e.g., TransE). | Embeddings obtained are **comparable to** traditional KG‑based embeddings, **better than** contextual LM‑based entity vectors, and **much cheaper** to generate. (R. Khan et al., *2023*) |
| **2** | **Cross‑lingual KG Alignment via Graph‑Matching Neural Network** | Formulates alignment as a graph‑matching problem on *topic entity graphs*—local sub‑graphs that capture contextual information around each entity. | - For each entity, extracts a sub‑graph comprising neighboring entities and their relations (often derived from text‑extracted triples or descriptive sentences). <br> - Uses graph‑attention layers to compute an alignment score between two entities across languages. | Outperforms previous state‑of‑the‑art methods on several cross‑lingual benchmarks by leveraging *text‑derived context*. (Z. Liu et al., *2022*) |
| **3** | **Type‑Augmented Relation Prediction (TaRP)** | Improves KG completion by incorporating *ontological type information* (entity & relation types) as a prior over the likelihood of a relation. | - Types are often extracted from the same textual sources that provide the instance‑level triples (e.g., infobox schemas, section headers). <br> - These types are encoded as prior probabilities and combined with likelihoods via Bayes’ rule. | Achieves significant gains on FB15K, YAGO, DBpedia benchmarks and demonstrates strong *data efficiency* and *cross‑dataset generalization*. (S. Zhang et al., *2023*) |

---

### How These Methods Blend Text and KG Knowledge

1. **Text → Lightweight Triples → KG Embeddings**  
   *ECG* bypasses full KG construction by turning natural‑language (semi‑structured) mentions into triples. The resulting “graph” is then fed into standard embedding algorithms.

2. **Text → Contextual Sub‑Graphs → Graph Matching**  
   *Cross‑lingual alignment* builds a *topic entity graph* from text‑derived relations. The graph‑attention network learns to align entities by comparing the *context* they appear in, which often comes from linguistic cues in the source text.

3. **Text → Type Information → Probabilistic Prediction**  
   *TaRP* uses types (e.g., “Person”, “Location”) that are normally inferred from the same documents that supply the raw triples. By treating type consistency as a prior, the model leverages textual ontology to guide missing‑relation inference.

---

### Take‑away

All three approaches demonstrate that **text can be directly leveraged to enrich, build, or align KG representations**—without the heavy engineering cost of traditional triple extraction or large‑scale graph construction. They also illustrate a spectrum of strategies:

- **Direct embedding from text** (ECG)  
- **Contextual graph construction for cross‑lingual tasks** (Cross‑lingual Graph Matching)  
- **Ontological priors derived from text** (TaRP)

These paradigms can be mixed or extended—e.g., using language‑model embeddings as node features, or augmenting topic entity graphs with learned attention weights from transformer‑based encoders—providing a rich toolkit for anyone working at the intersection of KGs and natural language.
"""

In [None]:
prompt = format_eval_prompt(query, context, answer)

messages = [
        {
            'role': 'user',
            'content': format_eval_prompt(query, context, answer),
        },
    ]

raw =  client.chat(model, messages=messages, stream=False)
print(raw.message.content)

## SciBERT embeddings

In [None]:
query = "Approaches combining knowledge graphs and text"
context = build_context_for_rag(chunks_sci)
answer = """**Approaches that fuse knowledge‑graph (KG) structure with textual information**

| # | Approach / Model | Core idea | How KG and text are combined | Typical use‑case |
|---|------------------|-----------|------------------------------|-----------------|
| 1 | **Attribute‑driven semantic units** (Attributes as Semantic Units chapter) | Visual concepts (attributes) are mined from language resources (e.g., WordNet, caption corpora) and used as semantic pivots between image features and natural language. | Attributes form a lightweight KG (attribute ↔ concept graph). Textual features (e.g., word embeddings) are aligned to the same attributes, enabling cross‑modal retrieval and captioning. | Image‑to‑text generation, visual grounding, VQA. |
| 2 | **Scene Description Graphs (SDGs)** | Each image is parsed into a graph where nodes are objects, attributes, and relations; edges encode spatial/semantic relations. | The SDG is enriched by two external knowledge sources:<br>• A *commonsense* KB derived from image captions (NLP‑extracted predicates).<br>• WordNet lexical ontology (synset relations). The graph thus merges visual detections, commonsense inference, and lexical semantics. | Automatic image captioning, image‑sentence alignment, visual storytelling. |
| 3 | **Graph‑enhanced neural summarization** (Structured Neural Summarization) | A conventional sequence encoder (e.g., Transformer) is augmented with a graph encoder that operates on a *dependency / discourse graph* extracted from the input text. | The graph captures long‑range dependencies that are hard to learn from the sequence alone. Textual tokens are first embedded, then passed through the graph neural network (GNN), and finally merged back into the sequence decoder. | Summarizing long documents, abstractive summarization. |
| 4 | **Retrieval‑based QA with triple‑convolutional networks** (TCNN & ATCNN) | The system retrieves a set of candidate knowledge entries from a QA KB, then reranks them by semantic matching. | Knowledge entries are first embedded as *triples* (subject–predicate–object). TCNN convolutions capture local patterns over these triples, while attention layers align the query text to the triple embeddings. | E‑commerce QA, customer support chatbots. |
| 5 | **Graph‑aware language models (not in the given titles but widely studied)** | Pre‑train language models on KG triples (e.g., ERNIE, K-BERT) or use KG embeddings as auxiliary signals. | KG embeddings are injected into the transformer layers or used to bias attention. | Question answering, entity linking, relation extraction. |
| 6 | **Joint KG–text representation learning** | Learn a shared vector space for KG entities and textual contexts (e.g., via co‑training). | Text passages containing entity mentions are aligned with KG nodes; a shared encoder (e.g., BERT) is fine‑tuned on both types of data. | Entity disambiguation, KG completion. |

---

### Common patterns in these methods

1. **Graph construction**  
   * From visual detections (SDG) → node = object/attribute, edge = spatial/semantic relation.  
   * From language corpora → nodes = entities/words, edges = dependency or co‑occurrence.  
   * From structured KBs (e.g., Freebase) → raw RDF triples.

2. **Feature fusion**  
   * **Early fusion** – concatenate raw text embeddings with KG embeddings before the encoder.  
   * **Late fusion** – process text and KG separately and combine scores (e.g., reranking).  
   * **Mid‑level fusion** – use GNNs to refine textual representations guided by graph structure.

3. **Inference or reasoning**  
   * Use graph propagation (GNN, message passing) to capture multi‑hop relationships.  
   * Apply rule‑based or probabilistic inference on the KG to enrich textual outputs (e.g., commonsense reasoning in SDG).

4. **Evaluation**  
   * End‑to‑end tasks: image captioning, VQA, summarization, QA.  
   * Alignment metrics: BLEU, METEOR for captions; ROUGE for summaries; precision/recall for retrieval.

---

### Take‑away

Approaches that merge knowledge graphs and text typically build a graph that captures semantic or relational structure (from vision, language, or curated KBs) and then apply neural or graph‑based methods to fuse this structured knowledge with unstructured textual signals. The fusion can be performed at multiple stages of the pipeline, enabling richer representations that improve performance on downstream tasks such as image captioning, question answering, and summarization."""

In [None]:
prompt = format_eval_prompt(query, context, answer)

messages = [
        {
            'role': 'user',
            'content': format_eval_prompt(query, context, answer),
        },
    ]

raw =  client.chat(model, messages=messages, stream=False)
print(raw.message.content)