In [1]:
import pandas as pd
import os
import time
import re
import concurrent.futures
from tqdm import tqdm
from langchain.prompts import PromptTemplate
from langchain.llms import Ollama
from langchain.chains import LLMChain
from openai import OpenAI
import requests

In [None]:
../../Summarization/stage 2/TEST.csv
../../Summarization/stage 2/leakage free summaries only.csv

In [None]:
# -------------------------------
# 🔧 Configuration
# -------------------------------
OLLAMA_SLEEP = 1
OPENAI_API_KEY = "my_key1"
CLAUDE_API_KEY = "my_key2"
GPT_MODEL = "gpt-4.1-mini"
client = OpenAI(api_key=OPENAI_API_KEY)

CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
CLAUDE_HEADERS = {
    "x-api-key": CLAUDE_API_KEY,
    "anthropic-version": "2023-06-01",
    "content-type": "application/json"
}
CLAUDE_URL = "https://api.anthropic.com/v1/messages"

INPUT_CSV = "../../Summarization/stage 2/leakage free summaries only.csv"
df = pd.read_csv(INPUT_CSV)

# -------------------------------
# 🧠 Prompt Template
# -------------------------------
recommendation_prompt = PromptTemplate(
    input_variables=["summary", "domain"],
    template="""
You are a computer science researcher collaborating with other researchers from the domain of {domain}. Your task is to recommend the most appropriate A.I., machine learning, or deep learning methods to solve the research idea and objectives described below.

Here is the summary of that research idea and objective: {summary}

Please suggest between 1 and 10 methods, but only include those that are essential and directly applicable to the research. Avoid listing methods that are speculative, overly generic, or redundant. It is better to recommend **fewer high-precision methods** than an exhaustive list. Do not include any explanation or justification. Only return the method names using the following exact format:  
<method>Method Name 1</method>, <method>Method Name 2</method>, ...

Respond with only the list.
"""
)


def make_claude_prompt(summary, domain):
    return recommendation_prompt.format(summary=summary, domain=domain)

# -------------------------------
# 🧹 GPT-4o Cleaner
# -------------------------------
def extract_clean_method_tags(raw_text, client, model="gpt-4.1-mini", temperature=0.1, max_tokens=500):
    prompt = f"""
You will be given a piece of text that contains one or more AI/ML/DL methods. Your task is to extract the method names and list them as a comma-separated list using this format:

<method>Method Name 1</method>, <method>Method Name 2</method>, ...

Do not include any other text or explanation. Only return method tags.

Here is the text:
{raw_text}
"""
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"❌ GPT-4o-mini extraction error: {e}")
        return "ERROR"

# -------------------------------
# ⚙️ Ollama Model Registry
# -------------------------------
ollama_models = {
    "llama3.1_8b": "llama3.1",
    "llama2_13b": "llama2:13b",
    "mistral_7b": "mistral",
    "gemma2_9b": "gemma2:9b",
    "gemma3_12b": "gemma3:12b",
}

# -------------------------------
# 🔁 Ollama Worker
# -------------------------------
def run_ollama_zero_shot(model_name, model_id, df):
    results = []
    print(f"\n🚀 Running Ollama model: {model_name}")
    llm = Ollama(model=model_id, temperature=0.15)
    chain = LLMChain(prompt=recommendation_prompt, llm=llm)

    for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc=f"{model_name}"):
        try:
            final_masked_extraction = str(row["final masked extraction"])
            domain = str(row.get("domain", "Unknown"))
            title = str(row.get("title", ""))
            abstract = str(row.get("abstract", ""))
            verified_methods_list = str(row.get("verified methods list", ""))

 
            raw_response = chain.run(summary=final_masked_extraction, domain=domain).strip()
            cleaned_response = extract_clean_method_tags(raw_response, client, model=GPT_MODEL)
            method_count = cleaned_response.count("<method>")
            generated_method_list = re.findall(r"<method>(.*?)</method>", cleaned_response)

            results.append({
                "domain": domain,
                "title": title,
                "abstract": abstract,
                "model": model_name,
                "masked extracted summary": final_masked_extraction,
                "recommended method raw": raw_response,
                "recommended method cleaned": cleaned_response,
                "method count": method_count,
                "generated method list": generated_method_list,
                "ground truth methods list": verified_methods_list
            })

            time.sleep(OLLAMA_SLEEP)
        except Exception as e:
            print(f"❌ Error in {model_name} on row {idx}: {e}")
            continue

    return results

# -------------------------------
# 🔁 Claude Model Sequential
# -------------------------------
def run_claude_zero_shot(df):
    results = []
    print("\n🤖 Running Claude model...")
    for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="Claude"):
        try:
            final_masked_extraction = str(row["final masked extraction"])
            domain = str(row.get("domain", "Unknown"))
            title = str(row.get("title", ""))
            abstract = str(row.get("abstract", ""))
            verified_methods_list = str(row.get("verified methods list", ""))

            prompt = make_claude_prompt(final_masked_extraction, domain)
            payload = {
                "model": CLAUDE_MODEL,
                "max_tokens": 1024,
                "messages": [{"role": "user", "content": prompt}]
            }

            response = requests.post(CLAUDE_URL, headers=CLAUDE_HEADERS, json=payload)
            response.raise_for_status()
            raw_response = response.json()["content"][0]["text"].strip()

            cleaned_response = extract_clean_method_tags(raw_response, client, model=GPT_MODEL)
            method_count = cleaned_response.count("<method>")
            generated_method_list = re.findall(r"<method>(.*?)</method>", cleaned_response)

            results.append({
                "domain": domain,
                "title": title,
                "abstract": abstract,
                "model": "claude-3.5-sonnet",
                "masked extracted summary": final_masked_extraction,
                "recommended method raw": raw_response,
                "recommended method cleaned": cleaned_response,
                "method count": method_count,
                "generated method list": generated_method_list,
                "ground truth methods list": verified_methods_list
            })

            time.sleep(OLLAMA_SLEEP)
        except Exception as e:
            print(f"❌ Claude error on row {idx}: {e}")
            continue
    return results

# -------------------------------
# 🔁 Run All + Save Outputs
# -------------------------------
results = []

# Run Ollama in threads
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [
        executor.submit(run_ollama_zero_shot, name, model_id, df)
        for name, model_id in ollama_models.items()
    ]
    for future in concurrent.futures.as_completed(futures):
        results.extend(future.result())

# Run Claude
results.extend(run_claude_zero_shot(df))


🚀 Running Ollama model: llama3.1_8b
🚀 Running Ollama model: llama2_13b


🚀 Running Ollama model: mistral_7b

🚀 Running Ollama model: gemma2_9b

🚀 Running Ollama model: gemma3_12b


  llm = Ollama(model=model_id, temperature=0.15)
  llm = Ollama(model=model_id, temperature=0.15)
  llm = Ollama(model=model_id, temperature=0.15)
  llm = Ollama(model=model_id, temperature=0.15)
  llm = Ollama(model=model_id, temperature=0.15)
  chain = LLMChain(prompt=recommendation_prompt, llm=llm)
  chain = LLMChain(prompt=recommendation_prompt, llm=llm)

gemma2_9b:   0%|                                                                               | 0/377 [00:00<?, ?it/s][A

  raw_response = chain.run(summary=final_masked_extraction, domain=domain).strip()
  raw_response = chain.run(summary=final_masked_extraction, domain=domain).strip()
[A[A


llama3.1_8b:   0%|                                                                             | 0/377 [00:00<?, ?it/s][A[A[A



mistral_7b:   0%|                                                                              | 0/377 [00:00<?, ?it/s][A[A[A[A



mistral_7b:   0%|▏                                                      


🤖 Running Claude model...


Claude: 100%|████████████████████████████████████████████████████████████████████████| 377/377 [26:21<00:00,  4.19s/it]


In [3]:
# Save everything
output_df = pd.DataFrame(results)
output_dir = "grouped_recommendations"
os.makedirs(output_dir, exist_ok=True)

everything_path = os.path.join(output_dir, "zero-shot.csv")
output_df.to_csv(everything_path, index=False)

# Grouped saves
model_dir = os.path.join(output_dir, "by_model")
os.makedirs(model_dir, exist_ok=True)
for model, group in output_df.groupby("model"):
    group.to_csv(os.path.join(model_dir, f"{model}.csv"), index=False)

domain_dir = os.path.join(output_dir, "by_domain")
os.makedirs(domain_dir, exist_ok=True)
for domain, group in output_df.groupby("domain"):
    safe = domain.replace(" ", "_").replace("/", "_")
    group.to_csv(os.path.join(domain_dir, f"{safe}.csv"), index=False)

model_domain_dir = os.path.join(output_dir, "by_model_and_domain")
os.makedirs(model_domain_dir, exist_ok=True)
for (domain, model), group in output_df.groupby(["domain", "model"]):
    safe_domain = domain.replace(" ", "_").replace("/", "_")
    filename = f"{safe_domain}__{model}.csv"
    group.to_csv(os.path.join(model_domain_dir, filename), index=False)

# Done
print("\n✅ All results processed and saved with progress bars.")


✅ All results processed and saved with progress bars.
