In [1]:
import sys
sys.path.append('../../')

from text2kg.core import Pipeline, PipelineEngine, Task
from text2kg.task import (
    LoadFolder,
    ExtractTranscripts,
    SummarizeTranscripts,
    SplitTranscripts,
)
import logging
from typing import List

In [2]:
class SimplePipelineEngine(PipelineEngine):
    """Simple pipeline implementation using Vanilla Python"""

    def execute(self, tasks: List[Task]):
        data = None
        for task in tasks:
            data = task.process(data)
            print(f"Completed task {task}")
        
        return data

In [3]:
class SelectFew(Task):
    """
    Select few outputs for multi-output task.
    Useful for testing :D
    """

    def __init__(self, start_index=0, count=1):
        """
        Args:
        - start_index: index to start selection from multi-output
        - count: number of outputs to select
        """
        self.start_index = start_index
        self.count = count

    def process(self, data):
        logging.info(f"Selecting one data from={data}")
        result = data[self.start_index:self.start_index + self.count + 1]
        logging.info("\n\n\n\nDEBUG !!!!!!!!!")
        logging.info(f"Selected result={result}")
        return result

In [4]:
pipeline = Pipeline(
    tasks=[
        LoadFolder(folder_path="/home/student/course-materials"),
        # SelectFew(),
        SelectFew(start_index=11),
        ExtractTranscripts(),
        SelectFew(),
        SplitTranscripts(max_tokens=4000),
        SelectFew(count=4),
        SummarizeTranscripts(host="localhost:11434"),
    ],
    pipeline_engine=SimplePipelineEngine(),
)
output = pipeline.run()


Completed task <text2kg.task.LoadFolder object at 0x7f1f8010f0a0>
Completed task <__main__.SelectFew object at 0x7f1f80157910>
Completed task <text2kg.task.ExtractTranscripts object at 0x7f1f4a2f91b0>
Completed task <__main__.SelectFew object at 0x7f1f4a2f87c0>
Completed task <text2kg.task.SplitTranscripts object at 0x7f1f4a2f9270>
Completed task <__main__.SelectFew object at 0x7f1f4a2f9360>
Completed task <text2kg.task.SummarizeTranscripts object at 0x7f1f4a2f9210>


In [12]:
print(f"Number of summaries: {len(output)}")
joint_summaries = "\n".join([
    single_output['summary']
    for single_output in output
])
print(joint_summaries)

Number of summaries: 5
**Key Concepts:**

* **Graduate school:** Can be challenging but rewarding, with potential for failure and low salary.
* **Midterm exam:** Scheduled for this week, covering chapters 3 and 5.
* **Loops:** Important concepts in Python programming, including while loops, range commands, and for loops.
* **Computational thinking:** Principles such as identifying similarities and recognizing patterns are essential for solving complex problems.
* **Practice problems:** Completion of practice problems is essential for understanding and applying loops.
* **Range command:** A command used to generate a set of numbers.
* **For loop:** A loop that iterates over a range of numbers.
**Key Concepts:**

* **For Loop:** A counter-control loop that iterates over a range of numbers.
* **Iteration Variable:** The variable assigned to each value in the range of values in a for loop.
* **Range Command:** A command that creates a range of numbers.
* **Range Operator:** The operator us

In [15]:
system_instructions = """
Given the following key concepts extracted from a university lecture transcript, list the nodes and edges that can form a knowledge graph based on the key topics.
Only create nodes based on large concepts. Only create an edge if the source concept is a prequisite to understand the target concept.
"""

format_instructions = """
# Output Format Instructions
Produce only JSON output and nothing else like so: ```json\n{nodes: [{id: <name of concept>}...], edges: [{source: <id of source concept node>, target: <id of target concept node>}]}\n```
"""

summary_instructions = """# Key Concepts{summaries}\n""".format(summaries=joint_summaries)

prompt = system_instructions + format_instructions + summary_instructions

print(prompt)


Given the following key concepts extracted from a university lecture transcript, list the nodes and edges that can form a knowledge graph based on the key topics.
Only create nodes based on large concepts. Only create an edge if the source concept is a prequisite to understand the target concept.

# Output Format Instructions
Produce only JSON output and nothing else like so: ```json
{nodes: [{id: <name of concept>}...], edges: [{source: <id of source concept node>, target: <id of target concept node>}]}
```
# Key Concepts**Key Concepts:**

* **Graduate school:** Can be challenging but rewarding, with potential for failure and low salary.
* **Midterm exam:** Scheduled for this week, covering chapters 3 and 5.
* **Loops:** Important concepts in Python programming, including while loops, range commands, and for loops.
* **Computational thinking:** Principles such as identifying similarities and recognizing patterns are essential for solving complex problems.
* **Practice problems:** Com

In [18]:
import ollama

llm = ollama.Client(host="localhost:11434")

response = llm.generate(model="gemma:7b", prompt=prompt)
print(response["response"])

```json
{
  "nodes": [
    {
      "id": "Graduate school"
    },
    {
      "id": "Loops"
    },
    {
      "id": "Computational thinking"
    },
    {
      "id": "Practice problems"
    },
    {
      "id": "Range command"
    },
    {
      "id": "For loop"
    },
    {
      "id": "Iteration variable"
    },
    {
      "id": "Range operator"
    },
    {
      "id": "Iteration range"
    },
    {
      "id": "Body of the loop"
    },
    {
      "id": "Index number"
    },
    {
      "id": "List length"
    }
  ],
  "edges": [
    {
      "source": "Graduate school",
      "target": "Practice problems"
    },
    {
      "source": "Loops",
      "target": "Practice problems"
    },
    {
      "source": "Computational thinking",
      "target": "Practice problems"
    },
    {
      "source": "Range command",
      "target": "For loop"
    },
    {
      "source": "For loop",
      "target": "Iteration variable"
    },
    {
      "source": "For loop",
      "target": "Range o