In [1]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
from transformers import AutoTokenizer, AutoModel
from infer_utils import load_image


path = "./InternVL2-4B"
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
generation_config = dict(max_new_tokens=1024, do_sample=True)

model = AutoModel.from_pretrained(
    path,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    use_flash_attn=False,
    trust_remote_code=True).eval().cuda()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


FlashAttention2 is not installed.


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.




Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.16it/s]


In [46]:
from dsp import LM

class InternVL(LM):
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.provider = "default"
        self.history = []
        self.kwargs = {
            "model": model,
            "temperature": 0.0,
            "max_tokens": 2048,
            "top_p": 1.0,
            "frequency_penalty": 0.0,
            "presence_penalty": 0.0,
            "n": 1,
        }
        # self.kwargs = {
        #     "temperature": 0.0,
        #     "max_tokens": 2048,
        #     # "top_p": 1,
        #     # "frequency_penalty": 0,
        #     # "presence_penalty": 0,
        #     # "n": 1,
        # }

    def basic_request(self, prompt: str, **kwargs):
        generation_config = {"temperature" : self.kwargs["temperature"],
                             "max_new_tokens" : self.kwargs["max_tokens"],
                             "top_p": self.kwargs["top_p"]} 


        response = self.model.chat(self.tokenizer, None, prompt, generation_config)
        self.history.append({
            "prompt": prompt,
            "response": response,
            "kwargs": kwargs,
        })
        return response

    def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs):
        response = self.request(prompt, **kwargs)
        completions = [response]
        return completions

In [47]:
import dspy
internvl2 = InternVL(model, tokenizer)
dspy.settings.configure(lm=internvl2)

In [7]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading data: 100%|██████████| 566M/566M [01:09<00:00, 8.17MB/s]   
Downloading data: 100%|██████████| 47.5M/47.5M [00:07<00:00, 6.34MB/s]
Downloading data: 100%|██████████| 46.2M/46.2M [00:07<00:00, 6.03MB/s]
Generating train split: 100%|██████████| 90447/90447 [00:13<00:00, 6468.71 examples/s] 
Generating validation split: 100%|██████████| 7405/7405 [00:01<00:00, 7241.50 examples/s]
Generating test split: 100%|██████████| 7405/7405 [00:01<00:00, 6956.72 examples/s]


(20, 50)

In [15]:
train_example = trainset[0]
print(f"Question: {train_example.question}")
print(f"Answer: {train_example.answer}")

Question: At My Window was released by which American singer-songwriter?
Answer: John Townes Van Zandt


In [16]:
dev_example = devset[18]
print(f"Question: {dev_example.question}")
print(f"Answer: {dev_example.answer}")
print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}")

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Answer: English
Relevant Wikipedia Titles: {'Restaurant: Impossible', 'Robert Irvine'}


In [17]:
print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}")
print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}")

For this dataset, training examples have input keys ['question'] and label keys ['answer']
For this dataset, dev examples have input keys ['question'] and label keys ['answer', 'gold_titles']


In [18]:

class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [28]:
# Define the predictor.
generate_answer = dspy.Predict(BasicQA)

# Call the predictor on a particular input.
pred = generate_answer(question=dev_example.question)

# Print the input and the prediction.
print(f"Question: {dev_example.question}")
print(f"Predicted Answer: {pred.answer}")



Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Predicted Answer: American


In [54]:
internvl2.inspect_history(n=1)

TypeError: string indices must be integers

In [29]:
dev_example

Example({'question': 'What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?', 'answer': 'English', 'gold_titles': {'Restaurant: Impossible', 'Robert Irvine'}}) (input_keys={'question'})

In [48]:
# Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)

# Call the predictor on the same input.
pred = generate_answer_with_chain_of_thought(question=dev_example.question)

# Print the input, the chain of thought, and the prediction.
print(f"Question: {dev_example.question}")
print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?


IndexError: list index out of range

In [49]:
dev_example

Example({'question': 'What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?', 'answer': 'English', 'gold_titles': {'Restaurant: Impossible', 'Robert Irvine'}}) (input_keys={'question'})

In [50]:
pred

Prediction(
    rationale='Answer: American',
    answer='American'
)