#### Install dependences

In [None]:
%pip install autoflow-ai==0.0.1.dev10
%pip install dotenv sqlalchemy ipywidgets pymysql

#### Configure environment variable

In [None]:
# Create .env file, then edit your .env, for example:
# $ cat .env
# DATABASE_URL='mysql+pymysql://root@localhost:4000/test'
# OPENAI_API_KEY='your_openai_api_key'
%cp .env.example .env

In [1]:
import logging
import os
import dotenv

logger = logging.getLogger(__name__)

dotenv.load_dotenv()

True

#### Init Autoflow

In [2]:
from sqlalchemy import create_engine
from autoflow import Autoflow
# from google.colab import userdata
# db_engine = create_engine(userdata.get('DATABASE_URL'))

db_engine = create_engine(os.getenv("DATABASE_URL"))
af = Autoflow(db_engine=db_engine)

#### Create knowledge base

In [11]:
from uuid import UUID

from autoflow.schema import IndexMethod
from autoflow.llms.chat_models import ChatModel
from autoflow.llms.embeddings import EmbeddingModel

chat_model = ChatModel("gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
embed_model = EmbeddingModel(
    model_name="text-embedding-3-small",
    dimensions=1536,
    api_key=os.getenv("OPENAI_API_KEY"),
)

# Create Knowledge base
kb = af.create_knowledge_base(
    id=UUID("655b6cf3-8b30-4839-ba8b-5ed3c502f30e"),
    name="New KB",
    description="This is a knowledge base for testing",
    index_methods=[IndexMethod.VECTOR_SEARCH, IndexMethod.KNOWLEDGE_GRAPH],
    chat_model=chat_model,
    embedding_model=embed_model,
)
kb

KnowledgeBase(id=UUID('655b6cf3-8b30-4839-ba8b-5ed3c502f30e'), name='New KB', index_methods=[<IndexMethod.VECTOR_SEARCH: 'VECTOR_SEARCH'>, <IndexMethod.KNOWLEDGE_GRAPH: 'KNOWLEDGE_GRAPH'>], description='This is a knowledge base for testing', chunking_config=GeneralChunkingConfig(mode=<ChunkingMode.GENERAL: 'general'>, chunk_size=1200, chunk_overlap=200, paragraph_separator='\n\n\n'), data_sources=[])

#### Import documents from files

In [None]:
import os

current_dir = os.path.dirname(os.path.abspath("__file__"))
current_dir

In [13]:
from pathlib import Path

kb.import_documents_from_files(
    files=[
        Path(current_dir) / "fixtures" / "tidb-overview.md",
    ]
)

[]

In [15]:
result = kb.search_documents(
    query="What is TiDB?",
    similarity_top_k=2,
)
[(c.score, c.chunk.text) for c in result.chunks]

[(0.7382136171419582,
  'What is TiDB Self-Managed Key features\n<!-- Localization note for TiDB:\n- English: use distributed SQL, and start to emphasize HTAP\n- Chinese: can keep "NewSQL" and emphasize one-stop real-time HTAP ("一栈式实时 HTAP")\n- Japanese: use NewSQL because it is well-recognized\n-->\nTiDB (/\'taɪdiːbi:/, "Ti" stands for Titanium) is an open-source distributed SQL database that supports Hybrid Transactional and Analytical Processing (HTAP) workloads. It is MySQL compatible and features horizontal scalability, strong consistency, and high availability. The goal of TiDB is to provide users with a one-stop database solution that covers OLTP (Online Transactional Processing), OLAP (Online Analytical Processing), and HTAP services. TiDB is suitable for various use cases that require high availability and strong consistency with large-scale data.\nTiDB Self-Managed is a product option of TiDB, where users or organizations can deploy and manage TiDB on their own infrastructure

In [16]:
kg = kb.search_knowledge_graph(
    query="What is TiDB?",
)
[(r.rag_description) for r in kg.relationships]

  results = super().execute(


['TiDB -> TiDB Self-Managed is a product option of TiDB that allows for deployment and management on user infrastructure. -> TiDB Self-Managed',
 'TiDB -> TiDB uses TiKV as its row-based storage engine to ensure consistent data storage. -> TiKV',
 'TiDB -> TiDB uses TiFlash as its columnar storage engine to replicate data from TiKV in real time. -> TiFlash',
 'TiDB -> TiDB employs the Multi-Raft protocol to manage transaction logs and ensure strong consistency across replicas. -> Multi-Raft Protocol',
 'TiDB -> TiDB Operator facilitates the management of TiDB on Kubernetes, automating cluster operations. -> TiDB Operator',
 'TiDB -> TiDB Cloud is a fully-managed service that simplifies the deployment and operation of TiDB clusters in the cloud. -> TiDB Cloud',
 'TiDB -> TiDB is built upon a specific architecture that defines its structure and functionality. -> TiDB Architecture',
 'TiDB -> TiDB utilizes a storage component to manage data persistence and retrieval. -> TiDB Storage',
 'T