# Context
Example: The Table and TableSchema class

In [None]:
from modaic.context.base import ContextSchema, Molecular
import pandas as pd
from io import BytesIO
from typing import ClassVar, Type, Optional
import duckdb


class Table(Context):
    context_class: ClassVar[str] = "Table"
    name: str
    num_rows: int


class Table(Molecular):
    """
    A molecular context object that represents a table. Can be queried with SQL.
    """

    schema: ClassVar[Type[ContextSchema]] = TableSchema

    def __init__(
        self, df: pd.DataFrame, name: str, num_rows: int = 100, **kwargs
    ):
        """
        Initializes a Table context object.

        Args:
            df: The dataframe to represent as a table.
            name: The name of the table.
            **kwargs: Additional keyword arguments to pass to the Molecular context object.
        """
        super().__init__(**kwargs)
        self._df = df
        self.name = name
        self.num_rows = num_rows
 

    def query(self, query: str):  # TODO: add example
        """
        Queries the table. All queries run should refer to the table as `this` or `This`
        """
        return duckdb.query_df(self._df, "this", query).to_df()

    def markdown(self) -> str:  # TODO: add example
        """
        Converts the table to markdown format.
        Returns a markdown representation of the table with the table name as header.
        """
        content = ""
        content += f"Table name: {self.name}\n"

        # Add header row
        columns = [str(col) for col in self._df.columns]
        content += "| " + " | ".join(columns) + " |\n"

        # Add header separator
        content += "| " + " | ".join(["---"] * len(columns)) + " |\n"

        # Add data rows
        for _, row in self._df.iterrows():
            row_values = []
            for value in row:
                if pd.isna(value) or value is None:
                    row_values.append("")
                else:
                    row_values.append(str(value))
            content += "| " + " | ".join(row_values) + " |\n"

        return content

    def readme(self):
        """
        readme method for table. Returns a markdown representation of the table.

        Example:
            ```python
            >>> df = pd.DataFrame({"Column1": [1, 2, 3], "Column2": [4, 5, 6], "Column3": [7, 8, 9]})
            >>> table = Table(df, name="table")
            >>> table.readme()
            "Table name: table\n"
            " | Column1 | Column2 | Column3 | \n"
            " | --- | --- | --- | \n"
            " | 1 | 2 | 3 | \n"
            " | 4 | 5 | 6 | \n"
            " | 7 | 8 | 9 | \n"
            ```
        """
        return self.markdown()

    def embedme(self):
        """
        embedme method for table. Returns a markdown representation of the table.
        """
        return self.markdown()
    
    @classmethod
    def from_csv(
        cls,
        file: str | BytesIO,
        name: Optional[str] = None,
        metadata: dict = {},
        **kwargs,
    ):
        df = pd.read_csv(file)
        return cls(df, name, metadata, **kwargs)


# Indexing
You can do alot with ContextSchema subclasses and the different database integrations modaic provides.

## Vector Database Example

In [None]:
from typing import List, Literal
import os
from modaic import Indexer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from modaic.databases import VectorDatabase, MilvusVDBConfig, SearchResult, SQLDatabase, SQLiteConfig
from modaic.indexing import PineconeReranker, Embedder
from modaic.context import Text, TextSchema, Filter

vector_db_config = MilvusVDBConfig(
    host="localhost",
    port=19530,
    collection_name="table_rag",
)

embedder = Embedder(model="openai/text-embedding-3-small")
vector_database = VectorDatabase(
    config=vector_db_config,
    embedder=embedder,
    payload_schema=Text.schema,
)

t1 = Table.from_csv("data/t1.csv", name = "budget1", num_rows = 100)
t2 = Table.from_csv("data/t2.csv", name = "budget2", num_rows = 200)
t3 = Table.from_csv("data/t3.csv", name = "budget3", num_rows = 900)

records = [t1, t2]

vector_database.add_records("table_rag", records, batch_size=10000)

embedding = embedder("2025 budget")

filter = TableSchema.num_rows > 100
results = vector_database.search("table_rag", embedding, 1, filter)
# reuslts will will be a list of SearchResult(score: float, result: TableSchema)


## Graph Database Example

In [None]:
from modaic.databases import GraphDatabase, Neo4jConfig
from modaic.context import Relationship

config = Neo4jConfig(
    host="localhost",
    port=7687,
    username="neo4j",
    password="password",
    database="neo4j",
    driver="neo4j",
    driver_args={},
    driver_kwargs={},
)

graph_db = GraphDatabase(config)

e1 = t1 >> Relationship(label="DERIVED_FROM") >> t2

e2 = t1 << Relationship(label="SIMILAR") >> t3

e1.save(graph_db)
e2.save(graph_db)


## Config
Stores static configuration parameters for an agent. (What params will change the behavior/accuracy of the expiriment)

In [None]:
from dataclasses import dataclass
from modaic import PrecompiledConfig


@dataclass
class TableAgentConfig(PrecompiledConfig):
    max_num_rows: int = 100
    embedding_model: str = "openai/text-embedding-3-small"


## Indexer
Ingest and queries over data to feed into the agent

In [None]:
import modaic
from modaic import Embedder
from modaic.context import Table, TableSchema
from modaic.databases import VectorDatabase, VectorDBConfig

class TableIndexer(modaic.Indexer):
    def __init__(self, config: TableAgentConfig, vdb_config: VectorDBConfig):
        super().__init__(config)
        self.vdb_config = vdb_config
        self.embedder = Embedder(config.embedding_model)
        self.vdb = VectorDatabase(vdb_config, self.embedder)

    def ingest(self, table: Table):
        self.vdb.add_records(table.name, [table], batch_size=10000)
    def query(self, query: str):
        embedding = self.embedder(query)
        results = self.vdb.search(embedding, 1, Filter(TableSchema.num_rows > self.max_num_rows))
        return results

In [None]:
from modaic import PrecompiledAgent
import dspy

class TableAgent(PrecompiledAgent):
    def __init__(self, config: TableAgentConfig, indexer: TableIndexer):
        super().__init__(config, indexer=indexer)
        self.summarizer = dspy.Predict("table -> summary")
    
    def forward(self, question: str):
        results = self.indexer.query(question)
        return self.summarizer(results)

# Pushing Frameworks to Hub

In [None]:
import json
from dataset import ExampleDataset
dataset = ExampleDataset()

config = PrecompiledConfig(max_num_rows=100)
vdb_config = VectorDBConfig(
    host="localhost",
    port=19530,
)
embedder = Embedder(model="openai/text-embedding-3-small")
vector_database = VectorDatabase(vdb_config, embedder)
indexer = TableIndexer(vdb_config, embedder, max_num_rows=100)
agent = TableAgent(config, indexer)

agent.forward("What is the total budget for the year?")

tp = dspy.MIPROv2(
    metric=dspy.evaluate.SemanticF1(decompositional=True), auto="medium", num_threads=24
)

tp.compile(agent, trainset=dataset, max_bootstrapped_demos=2, max_labeled_demos=2)

agent.push_to_hub("tytodd/table-agent")

# Load Framework from Hub

In [None]:
from modaic import AutoAgent, AutoIndexer

indexer = AutoIndexer.from_precompiled(
    "tytodd/table-indexer",
    vdb_config=VectorDBConfig(host="localhost", port=19530),
)

agent = AutoAgent.from_precompiled("tytodd/table-agent", indexer=indexer)
