In [3]:
import nest_asyncio
nest_asyncio.apply()  # This allows running async code in Jupyter

from tqdm import tqdm
import pandas as pd
import requests
import json
import re
import asyncio
import aiohttp
from typing import List, Dict, Tuple

# Base prompt remains the same
prompt = ''' You are an expert in natural language processing and query classification. Your task is to label each given query as either "retrieval needed" (1) or "retrieval not needed" (0). 

Instructions:
1. Analyze the query carefully.
2. If the query requires specific, detailed, or specialized external information (e.g., fact-checking, detailed data, or precise terminology), label it as "1" (retrieval needed).
3. If the query is generic, abstract, or related to tasks like summarization, paraphrasing, or general knowledge that can be answered by a language model without external data, label it as "0" (retrieval not needed).
4. Provide your answer in the following format: 
   <label><0 or 1></label>
   <explanation><one or two sentences explaining your reasoning></explanation>

Examples:
Query: When did Virgin Australia start operating?
<label>1</label>
<explanation>This query asks for a specific historical fact (a start date) that is unlikely to be deduced purely from context and requires external factual data.</explanation>

Query: Which is a species of fish? Tope or Rope
<label>1</label>
<explanation>The query requires knowing the correct fish species between two options, which is a precise factual lookup rather than a generic or inferable answer.</explanation>

Query: Alice's parents have three daughters: Amy, Jessy, and what’s the name of the third daughter?
<label>0</label>
<explanation>This is a riddle where the answer is directly given in the query ("Alice"), making external retrieval unnecessary.</explanation>

Query: What individual has won the most Olympic gold medals in the history of the games?
<label>1</label>
<explanation>This is a specific fact-based question that requires up-to-date and precise information about Olympic records, which typically necessitates an external data source.</explanation>

Query: Which Dutch artist painted “Girl with a Pearl Earring”?
<label>0</label>
<explanation>This is a well-known art fact that most language models can answer from general knowledge without needing to retrieve external information.</explanation>
   
Now, please label the following query:

{query}
'''

async def process_query_batch(session: aiohttp.ClientSession, queries: List[str], 
                            semaphore: asyncio.Semaphore) -> List[Tuple[int, float, str]]:
    async def process_single_query(query: str) -> Tuple[int, float, str]:
        async with semaphore:
            try:
                payload = {
                    "model": "deepseek-r1:14b",
                    "prompt": prompt.format(query=query),
                    "stream": False,
                    'num_predict': 30,
                    "temperature": 0.1
                }
                
                async with session.post('http://localhost:11435/api/generate', 
                                      json=payload) as response:
                    result = await response.json()
                    response_text = result['response']
                    #print(response_text)

                    think = re.search(r'<think>(.*?)</think>', response_text)
                    think = think.group(1).strip() if think else ""
                    #print(think)
                    
                    label_match = re.search(r'<label>(\d)</label>', response_text)
                    explanation_match = re.search(r'<explanation>(.*?)</explanation>', response_text)
                    
                    label = int(label_match.group(1)) if label_match else -1
                    explanation = explanation_match.group(1).strip() if explanation_match else ""
                    confidence = 1.0 if label != -1 else 0.0
                    
                    return label, confidence, explanation, think
            except Exception as e:
                print(f"Error processing query: {e}")
                return -1, 0.0, "", ""

    tasks = [process_single_query(query) for query in queries]
    return await asyncio.gather(*tasks)

async def evaluate_model(df: pd.DataFrame, batch_size: int = 8) -> pd.DataFrame:
    queries = [f"{i} {c}" for i, c in zip(df['rewrite_prompt'].astype(str), 
                                         df['original_text'].astype(str))]
    results = []
    
    semaphore = asyncio.Semaphore(4)
    
    async with aiohttp.ClientSession() as session:
        for i in tqdm(range(0, len(queries), batch_size)):
            batch_queries = queries[i:i + batch_size]
            batch_results = await process_query_batch(session, batch_queries, semaphore)
            
            for j, (pred_label, confidence, reasoning, think) in enumerate(batch_results):
                idx = i + j
                results.append({
                    'query': queries[idx],
                    'instruction': df.iloc[idx]['rewrite_prompt'],
                    'context': df.iloc[idx]['original_text'],
                    'predicted_label': pred_label,
                    'confidence': confidence,
                    'reasoning': reasoning,
                    'think': think
                })
            
            if i % 100 == 0:
                pd.DataFrame(results).to_csv(f'ollama_results_{i}.csv', index=False)
    
    return pd.DataFrame(results)

In [2]:
from datasets import load_dataset
import pandas as pd
data = load_dataset("positivethoughts/merge_rewrite_13.3k")
df = pd.DataFrame(data['train'])
df

Unnamed: 0,rewrite_prompt,rewritten_text,original_text,id
0,Paraphrase the provided text effectively.,A batch of cookies were made by Amanda and she...,Amanda baked cookies and will bring Jerry some...,
1,Rewrite the text in a more informal tone while...,Olivia and Olivier are both backing the libera...,Olivia and Olivier are voting for liberals in ...,
2,Rearrange the sentence structure for a fresh p...,"To get more stuff done, Kim may consider tryin...",Kim may try the pomodoro technique recommended...,
3,Shift the focus to Rachel's perspective withou...,"Rachel stands outside Edward's door, feeling a...",Edward thinks he is in love with Bella. Rachel...,
4,Adapt the text for a workplace setting while m...,Samantha is feeling uncertain at work after ov...,"Sam is confused, because he overheard Rick com...",
...,...,...,...,...
13360,1950s American Suburbia: Recast the essay with...,The picket fences and shining chrome of 1950s ...,"`` Nein, Nein, Die Englisch sind hier! Geoff i...",dZpCXooaHn
13361,Write like William Shakespeare: Write in Shake...,"Fair maiden, thy beauty doth beguile,\nWith ey...",Trigger warning. \n \n \n -- - \n \n \n Every ...,FWONqGJidk
13362,Write like a Beat poet: Use the rhythmic and r...,"Candle, light in the darkness, life in the bel...","Nobody understand candle. Candle is light, can...",TDCNripkkT
13363,Space Explorer: Adopt the curious and hopeful ...,**Cooper's Journey**\n\nIn the vast expanse of...,**Cooper is back! ** \n \n Last year they left...,xulqOrwbms


In [4]:
# Load and prepare data
#df = pd.read_csv('./code_alpaca.csv')
df['total_chars'] = df['rewrite_prompt'].fillna('').str.len() + df['original_text'].fillna('').str.len()
df = df[df['total_chars'] <= 2000]  # Filter long sequences
df = df[:3000]

# Run evaluation
loop = asyncio.get_event_loop()
results_df = loop.run_until_complete(evaluate_model(df, batch_size=8))

# Save results
results_df.to_csv('ollama_results_final.csv', index=False)

# Print statistics
print("\nLabel distribution:")
print(results_df['predicted_label'].value_counts())

 75%|███████▌  | 282/375 [3:17:53<42:30, 27.42s/it]  

KeyboardInterrupt: 

 77%|███████▋  | 287/375 [3:20:35<43:57, 29.97s/it]

In [2]:
import pandas as pd
results_df = pd.read_csv('./ollama_results_2200.csv')
results_df['predicted_label'].value_counts()

predicted_label
 0    2059
 1     134
-1      15
Name: count, dtype: int64

In [5]:
results_df[results_df['predicted_label'] == 1]

Unnamed: 0,query,instruction,context,predicted_label,confidence,reasoning,think
111,Shift the focus to highlight the characters' r...,Shift the focus to highlight the characters' r...,"Joe, Tim and Jack are watching 'The Millionaire'.",1,1.0,This query asks for specific and detailed info...,
330,Provide an updated status change for Pedro. Pe...,Provide an updated status change for Pedro.,Pedro got promoted.,1,1.0,This query requests specific and personalized ...,
354,Provide a recommendation for a romantic movie....,Provide a recommendation for a romantic movie.,"Emma recommends Mario ""I'm in love"" movie.",1,1.0,The query refers to a specific romantic movie ...,
462,"""Provide advice on optimizing battery usage fo...","""Provide advice on optimizing battery usage fo...",Waze is consuming batteries really fast.,1,1.0,The query asks for specific advice on optimizi...,
593,Reverse the order of sentences. Tom wants to b...,Reverse the order of sentences.,Tom wants to borrow Alan's car but Alan needs ...,1,1.0,This query asks for the reversal of sentence o...,
...,...,...,...,...,...,...,...
2157,Explain this as if it were a sports commentary...,Explain this as if it were a sports commentary.,"Stack traces in GHCJS June 20, 2014 - Tagged a...",1,1.0,The query asks for an explanation of stack tra...,
2170,Rewrite this as a project to be developed. The...,Rewrite this as a project to be developed.,There's been some level of uncertainty regardi...,1,1.0,This query involves a detailed analysis of a s...,
2177,Imagine this as an insight to be discovered. P...,Imagine this as an insight to be discovered.,Passing the sanctions bill is likely to be a r...,1,1.0,The query pertains to a detailed and current l...,
2180,Write the text as if it were a late-night come...,Write the text as if it were a late-night come...,Google-centric Chromebooks are turning out to ...,1,1.0,This query provides specific data about Chrome...,
