!pip3 install dspy-ai

!pip3 install accelerate

!pip3 install llama-index-embeddings-huggingface

In [1]:
import pandas as pd

import dspy
from dspy.evaluate import Evaluate
from dspy.datasets.hotpotqa import HotPotQA
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

# from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
# from llama_index.readers.file import PandasExcelReader
# from llama_index.core.embeddings import resolve_embed_model
# from transformers import AutoTokenizer


In [2]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context_str = dspy.InputField(desc="contains relevant facts")
    query_str = dspy.InputField()
    answer = dspy.OutputField(desc="value from the spreadsheet in the retriever.")


class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [None]:
access_token = "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"
# model_name = "EleutherAI/gpt-neo-125m"
# model_name = "clibrain/mamba-2.8b-instruct-openhermes"
# model_name = "microsoft/Phi-3-mini-128k-instruct" # 128K context window
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # 8K context window
# model_name = "mistralai/Mistral-7B-Instruct-v0.3" # 32K context window
# model_name = "clibrain/mamba-2.8b-instruct-openhermes" # 8K context window
embed_model_name = "BAAI/bge-small-en-v1.5"
model_name = "Qwen/Qwen2-1.5B-Instruct"
llm = dspy.HFModel(model=model_name, hf_device_map='auto', token=access_token)
llm.kwargs['max_new_tokens']=1000
llm.kwargs['do_sample']=False
# llm.kwargs['typical_p']=0.9
# llm.kwargs['temperature']=0.1
# dspy.settings.configure(lm=llm)

colbert = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

# llm.model=None
# gc.collect
# llm.model=AutoModelForCausalLM.from_pretrained(model_name, quantization_config=None, trust_remote_code=True, device_map="auto", attn_implementation="flash_attention_2",  torch_dtype=torch.float16)
# llm.model.generation_config.pad_token_id = llm.tokenizer.eos_token_id
# llm.tokenizer.pad_token_id = llm.tokenizer.eos_token_id

filepath = "/workspace/data/MASTER - PYTHON - SCORING MODEL - MCG MADISON RIDGE DST - v2.0.xlsx"
# documents = PandasExcelReader(sheet_name="5 - Disposition Analysis").load_data(filepath)
# embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
# vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
# # vector_index.storage_context.persist(persist_dir="/workspace/data/storage/alpha")
# retriever = vector_index.as_retriever(top_k=2)

dspy.settings.configure(lm=llm, rm=retriever)


In [None]:
class SentimentSummary(dspy.Signature):
    """Given the sentiment label craft a sentiment summary of the given text so that the summary justifies the sentiment label."""
    text = dspy.InputField(desc="Sentiment text")
    sentiment_label = dspy.InputField(desc="Sentiment label")
    answer = dspy.OutputField(desc="Sentiment label: {sentiment_label}\n\nSummary: {sentiment summary}")


class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context_str = dspy.InputField(desc="contains relevant facts")
    query_str = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")




In [None]:
from llama_index.core.query_pipeline import QueryPipeline as QP, InputComponent, FnComponent
from dspy.predict.llamaindex import DSPyComponent, LlamaIndexModule

dspy_component = DSPyComponent(
    dspy.ChainOfThought(GenerateAnswer)
)

retriever_post = FnComponent(
    lambda contexts: "\n\n".join([n.get_content() for n in contexts])
)


p = QP(verbose=True)
p.add_modules(
    {
        "input": InputComponent(),
        "retriever": retriever,
        "retriever_post": retriever_post,
        "synthesizer": dspy_component,
    }
)
p.add_link("input", "retriever")
p.add_link("retriever", "retriever_post")
p.add_link("input", "synthesizer", dest_key="query_str")
p.add_link("retriever_post", "synthesizer", dest_key="context_str")


dspy_qp = LlamaIndexModule(p)

In [None]:
agent = dspy.ReAct("question -> answer", tools=[dspy.Retrieve(k=1)])

In [None]:
agent(question="What is the Disposition Fee?")

In [None]:
from typing import List, Any, Callable, Optional
from pydantic import BaseModel

In [None]:
class Plan(dspy.Signature):
    """Produce a step by step plan to perform the task. 
The plan needs to be in markdown format and should be broken down into big steps (with ## headings) and sub-steps beneath those.
When thinking about your plan, be sure to think about the tools at your disposal and include them in your plan.
    """
    task = dspy.InputField(prefix="Task", desc="The task")
    context = dspy.InputField(format=str, desc="The context around the plan")
    proposed_plan = dspy.OutputField(desc="The proposed, step by step execution plan.")

In [None]:
class Worker(dspy.Module):
    def __init__(self, role:str, tools:List):
        self.role = role
        self.tools = tools
        self.tool_descriptions = "\n".join([f"- {t.name}: {t.description}. To use this tool please provide: `{t.requires}`" for t in tools])
        self.plan = dspy.ChainOfThought(Plan)
    def forward(self, task:str):
        context = f"{self.role}\n{self.tool_descriptions}"
        input_args = dict(
            context = context,
            task = task
        ) # just did args for printing for debugging
        result = self.plan(**input_args)
        print(result.proposed_plan)

In [None]:
class Tool(BaseModel):
    name: str
    description: str
    requires: str
    func: Callable

In [None]:
test_tools = [
    Tool(name="phone", description="a way of making phone calls", requires="phone_number", func=lambda x: "they've got time"),
    Tool(name="local business lookup", description="Look up businesses by category", requires="business category", func=lambda x: "Bills landscaping: 415-555-5555")
]

In [None]:
with dspy.context(lm=wrkr):
    Worker("assistant", test_tools).forward("get this yard cleaned up.")

In [None]:
class Worker2(dspy.Module):
    def __init__(self, role:str, tools:List):
        self.role = role
        self.tools = dict([(t.name, t) for t in tools])
        self.tool_descriptions = "\n".join([f"- {t.name}: {t.description}. To use this tool please provide: `{t.requires}`" for t in tools])
        self._plan = dspy.ChainOfThought(Plan)
        self._tool = dspy.ChainOfThought("task, context -> tool_name, tool_argument")
        
        print(self.tool_descriptions)
    def plan(self, task:str, feedback:Optional[str]=None):
        context = f"Your role:{self.role}\n Tools at your disposal:\n{self.tool_descriptions}"
        if feedback:
            context += f"\nPrevious feedback on your prior plan {feedback}"
        input_args = dict(
            task=task,
            context=context
        )    
        result = self._plan(**input_args)
        return result.proposed_plan
    def execute(self, task:str, use_tool:bool):
        print(f"executing {task}")
        if not use_tool:
            return f"{task} completed successfully"
            
        res = self._tool(task=task, context=self.tool_descriptions)
        t = res.tool_name
        arg = res.tool_argument
        if t in self.tools:
            complete = self.tools[t].func(arg)
            return complete
        return "Not done"

In [None]:
from dsp.utils import flatten, deduplicate

AGENTS = [x[-1] for x in optimized_react.candidate_programs[:5]]

class Aggregator(dspy.Module):
	def __init__(self, temperature=0.0):
		self.aggregate = dspy.ChainOfThought('context, question -> answer')
		self.temperature = temperature

	def forward(self, question):
		# Run all five agents with high temperature, then extract and deduplicate their observed contexts
		with dspy.context(lm=gpt3.copy(temperature=self.temperature)):
			preds = [agent(question=question) for agent in AGENTS]
			context = deduplicate(flatten([flatten(p.observations) for p in preds]))

		# Run the aggregation step to produce a final answer
		return self.aggregate(context=context, question=question)

In [None]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from typing import List, Literal
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.core.tools import FunctionTool
from llama_index.core.base.llms.types import (
    ChatMessage,
    MessageRole,
)


query_engine_tools = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="spreadsheet_value_retriever",
        description="contains the information of a spreadsheet, and is useful for retrieving specific values from a spreadsheet",
    ),
)

def adding_values(values: List[float]):
    return sum(values)


class AddingArgs(BaseModel):
    values: List = Field(
        description="A list of values to add together."
    )

adding_tool = FunctionTool.from_defaults(
    fn=adding_values,
    name="sum_values",
    description="Add a list of values together",
    fn_schema=AddingArgs,
)

In [None]:
message = ['Selling Costs',
  'Disposition Fee',
  'Net Operating Income',
  'Loan Assumption/Payoff',
  'Return of Forecasted Reserves',
  'CF Y 11',
  'Return of Maximum Offering Amount',
  'Projected Terminal Cap Rate',
  'Cash Flows']
content='Retrieve the following values from the spreadsheet: Selling Costs, Disposition Fee, Net Operating Income, Loan Assumption/Payoff, Return of Forecasted Reserves, CF Y 11, Return of Maximum Offering Amount, Projected Terminal Cap Rate, Cash Flows (categories 1 through 9)\nThen add Disposition Fee and Selling Cost together.'

usr_msg = ChatMessage(
    role=MessageRole.ASSISTANT,
    content=content
)
response = agent1.chat(content)

In [None]:
response

In [None]:
content='Retrieve the following values from the spreadsheet: Selling Costs, Disposition Fee, Net Operating Income, Loan Assumption/Payoff, Return of Forecasted Reserves, CF Y 11, Return of Maximum Offering Amount, Projected Terminal Cap Rate, Cash Flows (categories 1 through 9)\nThen add Disposition Fee and Selling Cost together.'

messages = [
        {"role": "user", "content": content},
    ]