In [1]:

`12`2`121`2`12`12112`12`12`12`12`12`12



import importlib
import os
import json
import logging
import time
from abc import ABC, abstractmethod
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# LLM configuration
LLM_API_ENDPOINT = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
LLM_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_OPENAI_KEY_HERE")


In [4]:
import dspy
# Initialize the language model
lm = dspy.LM(
    model="gpt-4o-mini",                # Your model name
    api_base=LLM_API_ENDPOINT,          # Your OpenAI-compatible endpoint
    api_key=LLM_API_KEY,                # Your API key
    temperature=0.7,
    max_tokens=16384
)

# Configure DSPy to use this language model
dspy.settings.configure(lm=lm)

# Test the connection with a simple call
try:
    response = lm("Hello! Can you tell me a fun fact about Python programming?")
    print("✅ Connection successful!")
    print("Response:", response[0])
    
    # Show usage statistics
    if lm.history:
        last_call = lm.history[-1]
        print(f"\nUsage: {last_call['usage']}")
        
except Exception as e:
    print("❌ Connection failed:")
    print(f"Error: {e}")

✅ Connection successful!
Response: Sure! A fun fact about Python programming is that the language is named after the British comedy television show "Monty Python's Flying Circus." Its creator, Guido van Rossum, wanted a name that was short, unique, and slightly mysterious. This love for humor is reflected in Python's design, which emphasizes readability and simplicity, making it an enjoyable language for programmers to work with. Additionally, the Python standard library includes several modules and functions with playful names, further showcasing its lighthearted spirit!

Usage: {'completion_tokens': 100, 'prompt_tokens': 20, 'total_tokens': 120, 'completion_tokens_details': CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), 'prompt_tokens_details': PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None)}


In [8]:
import dspy


lm = dspy.LM(
    model="gpt-4o-mini",
    api_base=LLM_API_ENDPOINT,
    api_key=LLM_API_KEY,
    temperature=0.0,
    max_tokens=16384
)
dspy.settings.configure(lm=lm)

# 2. Define your task signature
class FieldMapping(dspy.Signature):
    """Map Microsoft Defender fields to Trend Micro fields."""
    defender_field = dspy.InputField()
    trend_micro_field = dspy.OutputField()

# 3. Create predictor
predictor = dspy.Predict(FieldMapping)

# 4. Create your dataset
trainset = [
    dspy.Example(defender_field="SHA256", trend_micro_field="objectFileHashSha256").with_inputs('defender_field'),
    dspy.Example(defender_field="ProcessName", trend_micro_field="objectProcessName").with_inputs('defender_field'),
    dspy.Example(defender_field="IPAddress", trend_micro_field="objectIpAddress").with_inputs('defender_field'),
    dspy.Example(defender_field="MD5", trend_micro_field="objectFileHashMd5").with_inputs('defender_field'),
    dspy.Example(defender_field="FileName", trend_micro_field="objectFileName").with_inputs('defender_field')
]

# 5. Define evaluation metric
def exact_match(example, pred, trace=None):
    return example.trend_micro_field.lower() == pred.trend_micro_field.lower()

def print_prompt_messages(messages, title):
    """Helper function to print messages in a readable format"""
    print(f"=== {title} ===")
    for i, msg in enumerate(messages):
        role = msg['role'].upper()
        content = msg['content']
        print(f"[{role}]: {content}")
        if i < len(messages) - 1:  # Don't print separator after last message
            print("-" * 40)
    print("=" * 80)
    print()

# 6. Test BEFORE optimization and capture prompt
print("Testing BEFORE optimization...")
lm.history.clear()  # Clear history
result_before = predictor(defender_field="CommandLine")

# Get the original prompt from history
if lm.history:
    original_messages = lm.history[-1]['messages']
    print_prompt_messages(original_messages, "ORIGINAL PROMPT (Before Optimization)")
    print(f"Result: {result_before.trend_micro_field}")
    print()

# 7. Optimize the prompt
print("=== OPTIMIZING PROMPT ===")
optimizer = dspy.BootstrapFewShot(metric=exact_match, max_bootstrapped_demos=3)
optimized_predictor = optimizer.compile(predictor, trainset=trainset)
print("Optimization complete!")
print()

# 8. Test AFTER optimization and capture prompt
print("Testing AFTER optimization...")
lm.history.clear()  # Clear history to get clean optimized prompt
result_after = optimized_predictor(defender_field="CommandLine")

# Get the optimized prompt from history
if lm.history:
    optimized_messages = lm.history[-1]['messages']
    print_prompt_messages(optimized_messages, "OPTIMIZED PROMPT (After Optimization)")
    print(f"Result: {result_after.trend_micro_field}")
    print()



Testing BEFORE optimization...
=== ORIGINAL PROMPT (Before Optimization) ===
[SYSTEM]: Your input fields are:
1. `defender_field` (str)

Your output fields are:
1. `trend_micro_field` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## defender_field ## ]]
{defender_field}

[[ ## trend_micro_field ## ]]
{trend_micro_field}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Map Microsoft Defender fields to Trend Micro fields.
----------------------------------------
[USER]: [[ ## defender_field ## ]]
CommandLine

Respond with the corresponding output fields, starting with the field `[[ ## trend_micro_field ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.

Result: Process Command Line

=== OPTIMIZING PROMPT ===


 80%|████████  | 4/5 [00:00<00:00, 1313.18it/s]

Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Optimization complete!

Testing AFTER optimization...
=== OPTIMIZED PROMPT (After Optimization) ===
[SYSTEM]: Your input fields are:
1. `defender_field` (str)

Your output fields are:
1. `trend_micro_field` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## defender_field ## ]]
{defender_field}

[[ ## trend_micro_field ## ]]
{trend_micro_field}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Map Microsoft Defender fields to Trend Micro fields.
----------------------------------------
[USER]: [[ ## defender_field ## ]]
SHA256

Respond with the corresponding output fields, starting with the field `[[ ## trend_micro_field ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.
----------------------------------------
[ASSISTANT]: [[ ## trend_micro_field ## ]]
objectFileHashSha256

[[ ## complete


