In [2]:
import pandas as pd
from openai import OpenAI
from autoddg import AutoDDG
from autoddg.utils import get_sample
import os
from autoddg.related.related import RelatedWorkProfiler

In [None]:
load_dotenv()

In [None]:

# Your existing MODEL_CONFIG
# MODEL_CONFIG = {
#     "base_url": "https://openrouter.ai/api/v1",
#     "api_key": os.getenv("OPENROUTER_API_KEY"), 
#     # "model_name": "mistralai/mistral-7b-instruct:free", 
#     "model_name": "google/gemini-2.0-flash-exp:free",
# }

MODEL_CONFIG = {
    "base_url": "http://localhost:11434/v1",  # Changed to Ollama
    "api_key": "ollama",  # Dummy key - Ollama doesn't check it
    "model_name": "llama3.2",  # Just the model name, no prefix
}

# Create client
client = OpenAI(
    api_key=MODEL_CONFIG["api_key"],
    base_url=MODEL_CONFIG["base_url"]
)

In [None]:
auto_ddg = AutoDDG(
    client=client, 
    model_name=MODEL_CONFIG["model_name"]
)

In [None]:
# Load dataset
df = pd.read_csv("../src/autoddg/related/data/code-15.csv")
sample_df, dataset_sample = get_sample(df, sample_size=100)

# Step 1: Profile the dataset
basic_profile, structural_profile = auto_ddg.profile_dataframe(df)

# Step 2: Analyze semantics
semantic_profile = auto_ddg.analyze_semantics(sample_df)

# Step 3: Generate topic
data_topic = auto_ddg.generate_topic("CODE-15%: a large scale annotated dataset of 12-lead ECGs", None, dataset_sample)

In [None]:
# Step 4: Analyze related work
related_profile = auto_ddg.analyze_related(
    pdf_path="../src/autoddg/related/papers/code15.pdf",
    dataset_name="CODE-15%: a large scale annotated dataset of 12-lead ECGs",
    max_pages=10
)

In [None]:
# Step 5: Generate description WITH related work
prompt, description = auto_ddg.describe_dataset(
    dataset_sample=dataset_sample,
    dataset_profile=basic_profile,
    use_profile=True,
    semantic_profile=semantic_profile,
    use_semantic_profile=True,
    data_topic=data_topic,
    use_topic=True,
    related_profile=related_profile,  # Pass the dict here
    use_related_profile=True
)

In [None]:
print(related_profile['summary'])

In [None]:
from autoddg.evaluation import BaseEvaluator

class Eval(BaseEvaluator):
    """
    Evaluate descriptions using OpenRouter Mistral models
    """
    def __init__(
        self,
        openrouter_api_key: str = "ollama",
        model_name: str = "llama3.2",
    ):
        client = OpenAI(
            api_key=openrouter_api_key, 
            base_url="http://localhost:11434/v1"
        )
        super().__init__(client=client, model_name=model_name)

In [None]:
# Baseline (without related work)
prompt_baseline, description_baseline = auto_ddg.describe_dataset(
    dataset_sample=dataset_sample,
    dataset_profile=basic_profile,
    use_profile=True,
    semantic_profile=semantic_profile,
    use_semantic_profile=True,
    data_topic=data_topic,
    use_topic=True,
    use_related_profile=False  # Turn OFF
)

# # With related work
# prompt_with_related, description_with_related = auto_ddg.describe_dataset(
#     dataset_sample=dataset_sample,
#     dataset_profile=basic_profile,
#     use_profile=True,
#     semantic_profile=semantic_profile,
#     use_semantic_profile=True,
#     data_topic=data_topic,
#     use_topic=True,
#     related_profile=related_profile,  # Pass the dict
#     use_related_profile=True  # Turn ON
# )

# # Compare
# print("Baseline:", description_baseline)
# print("\nWith Related Work:", description_with_related)

In [None]:
print("Baseline:", description_baseline)
# print("\nWith Related Work:", description_with_related)

In [None]:
print(description)

In [None]:
auto_ddg.set_evaluator(Eval(openrouter_api_key="ollama"))


# Score descriptions
augmented_score = auto_ddg.evaluate_description(description)
# baseline_score = auto_ddg.evaluate_description(description_baseline)

print("Score of the general description:", augmented_score)
# print("Score of the search-focused description:", baseline_score)