In [None]:
## Install Dependencies
!pip install dspy transformers torch accelerate

!pip install --upgrade transformers

## Import Required Libraries
import dspy
import torch
from transformers import AutoModelForVision2Seq, Blip2Processor

# Check for GPU & Enable Offloading
device = "cuda" if torch.cuda.is_available() else "cpu"
offload_buffers = True  # Reduce memory usage for large models

# Load Proxy-Lite-3B model
model_name = "convergence-ai/proxy-lite-3b"

# Load processor (handles text input)
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b", trust_remote_code=True)

# Load the model with offloading enabled
model = AutoModelForVision2Seq.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    offload_buffers=offload_buffers,
    trust_remote_code=True
)

## Define Custom DSPy Model Wrapper
class CustomProxyLiteLLM(dspy.LM):
    def __init__(self, model, processor):
        super().__init__(model=model, provider="transformers")
        self.model = model
        self.processor = processor

    def complete(self, prompt, **kwargs):
        # Ensure correct input format
        inputs = self.processor(prompt, return_tensors="pt").to("cuda")

        # Generate response
        with torch.no_grad():
            output = self.model.generate(**inputs, max_length=512, do_sample=True, temperature=0.7)

        # Decode output
        return self.processor.tokenizer.decode(output[0], skip_special_tokens=True)

## Configure DSPy to Use Proxy-Lite-3B
llm = CustomProxyLiteLLM(model, processor)
dspy.settings.configure(lm=llm)

## Define a Prompt Optimization Task
class MyPromptOptimizationTask(dspy.Signature):
    query = dspy.InputField()
    response = dspy.OutputField(desc="An optimized response to the query")

class PromptOptimizer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generator = dspy.Predict(MyPromptOptimizationTask)

    def forward(self, query):
        return self.generator(query=query)

## Test the Model with a Sample Query
optimizer = PromptOptimizer()
output = optimizer("Explain black holes in simple terms.")
print(output.response)


Collecting dspy
  Downloading dspy-2.6.10-py3-none-any.whl.metadata (7.3 kB)
Collecting backoff (from dspy)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting ujson (from dspy)
  Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)
Collecting datasets<3.0.0,>=2.14.6 (from dspy)
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting optuna (from dspy)
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting magicattr~=0.1.6 (from dspy)
  Downloading magicattr-0.1.6-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting litellm<2.0.0,>=1.59.8 (from dspy)
  Downloading litellm-1.63.3-py3-none-any.whl.metadata (37 kB)
Collecting diskcache (from dspy)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting json-repair (from dspy)
  Downloading json_repair-0.39.1-py3-none-any.whl.metadata (11 kB)
Collecting asyncer==0.0.8 (from dspy)
  Downloading asyncer-0.0.8-py3-none-any

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.56M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/68.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/65.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.51G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/126 [00:00<?, ?B/s]

TypeError: argument of type 'Qwen2_5_VLForConditionalGeneration' is not iterable