**Notebook Description: Feedback-Adjusted Query Optimization**

This notebook explores the impact of feedback-based query refinement on article retrieval quality, comparing the feedback-adjusted method to the expanded query approach. To simulate user feedback, we generate artificial feedback by marking articles with above-average model scores as relevant. This feedback is used to refine the query vector—prioritizing relevant articles and reducing the influence of less relevant ones—to improve retrieval accuracy.

For evaluation, we rely on model assessments as the primary metric: three language models rate the relevance of each article's abstract to the query. We use the average of these scores and cosine similarity to measure the effectiveness of retrieval. This analysis highlights how feedback, even when simulated, can dynamically improve the quality of results.

In [None]:
#imports
!pip install openai==0.28
!pip install pinecone cohere openai sentence_transformers

from sentence_transformers import SentenceTransformer
from sklearn.metrics import ndcg_score
import google.generativeai as genai
import pinecone
import openai
import cohere
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from pinecone import Pinecone, ServerlessSpec
import os


In [None]:
# Set up Pinecone
pc = Pinecone(api_key='YOUR-API-KEY')
index = pc.Index("document-embeddings")

In [None]:
# Set up OpenAI, Gemini and Cohere API keys
openai.api_key = 'YOUR-API-KEY'
co = cohere.Client('YOUR-API-KEY')
genai.configure(api_key='YOUR-API-KEY')


In [None]:
# Initialize SentenceTransformer model for embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")

# Function to encode text using SentenceTransformer
def encode_text(text):
    embeddings = model.encode(text)
    return embeddings

In [None]:
# Retrieve top K articles from Pinecone
def retrieve_articles(query, top_k=5):
    query_embedding = encode_text(query)
    response = index.query(vector=query_embedding.tolist(), top_k=top_k, include_metadata=True, include_values=True)
    articles = [(match['metadata']['title'], match['metadata']['abstract']) for match in response['matches']]
    return articles

# Evaluate articles using GPT, Cohere, and Gemini
def evaluate_articles(query, articles):
    gpt_scores, cohere_scores, gemini_scores = [], [], []
    for title, abstract in articles:
        combined_text = f"Title: {title}\nAbstract: {abstract}"

        # OpenAI GPT Score
        gpt_score = call_gpt_api(query, combined_text)
        gpt_scores.append(gpt_score)

        # Cohere Score
        cohere_score = call_cohere_api(query, combined_text)
        cohere_scores.append(cohere_score)

        # Gemini Score
        gemini_score = call_gemini_api(query, combined_text)
        gemini_scores.append(gemini_score)

    avg_scores = [(g + c + a) / 3 for g, c, a in zip(gpt_scores, cohere_scores, gemini_scores)]
    return avg_scores

# GPT API call
def call_gpt_api(query, abstract):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a relevance evaluator for scientific articles. Reply with only a single number between 1 and 10."},
            {"role": "user", "content": f"Question: {query}\nArticle Abstract: {abstract}\nRate the relevance from 1 to 10. Only provide the number as an answer."}
        ]
    )
    score_text = response['choices'][0]['message']['content']
    return float(score_text.strip())


# Cohere API call
def call_cohere_api(query, abstract):
    response = co.generate(
        model="command-xlarge-nightly",
        prompt=f"You are a relevance evaluator for scientific articles. Question: {query}\nArticle Abstract: {abstract}\nRate the relevance from 1 to 10. Only provide the number as an answer.",
        max_tokens=10
    )
    return float(response.generations[0].text.strip())


# Gemini API call
def call_gemini_api(query, abstract):
    model = genai.GenerativeModel("gemini-1.5-flash")
    prompt_text = f"You are a relevance evaluator for scientific articles. Question: {query}\nArticle Abstract: {abstract}\nRate the relevance from 1 to 10. Only provide the number as an answer."
    response = model.generate_content(prompt_text)
    return float(response.text)


# Query expansion using GPT
def expand_query(query):
    # Generate expanded terms related to the original query using GPT
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an expert in data science and information retrieval."},
            {"role": "user", "content": f"Provide additional keywords or phrases that could be incorporated into the following query to refine or broaden it, but do not answer the question. Return a concise list of relevant terms or phrases only. Query: '{query}'"}
        ]
    )
    expanded_terms = response['choices'][0]['message']['content'].strip()
    expanded_query = f"{query} {expanded_terms}"  # Combine original query with expanded terms
    return expanded_query



In [None]:
# Function to get the top 5 articles based on model score, combining original and expanded queries
def get_top_5_articles(query, index, top_k=5):
    # Retrieve articles and scores for the original query
    original_articles = retrieve_articles(query, top_k)
    original_scores = evaluate_articles(query, original_articles)

    # Retrieve articles and scores for the expanded query
    expanded_query = expand_query(query)
    expanded_articles = retrieve_articles(expanded_query, top_k)
    expanded_scores = evaluate_articles(query, expanded_articles)

    # Combine results from original and expanded queries
    combined_articles = []
    for i, (title, abstract) in enumerate(original_articles):
        combined_articles.append({
            "title": title,
            "abstract": abstract,
            "model_score": original_scores[i],
            "source_query": "original"
        })

    for i, (title, abstract) in enumerate(expanded_articles):
        # Avoid duplicates
        if title not in [article["title"] for article in combined_articles]:
            combined_articles.append({
                "title": title,
                "abstract": abstract,
                "model_score": expanded_scores[i],
                "source_query": "expanded"
            })

    # Sort combined articles by model score and select the top 5
    top_5_articles = sorted(combined_articles, key=lambda x: x["model_score"], reverse=True)[:5]

    # Format the top 5 articles in the same structure as the original code
    formatted_articles = [(article["title"], article["abstract"]) for article in top_5_articles]
    return formatted_articles, [article["model_score"] for article in top_5_articles]  # Return titles, abstracts, and scores



In [None]:
# Function to refine the query vector based on feedback
def refine_query_with_feedback(article_embeddings, feedback, weight_relevance=1.0, weight_irrelevance=0.1):
    # Separate embeddings based on feedback
    relevant_embeddings = [emb for emb, fb in zip(article_embeddings, feedback) if fb == 1]
    irrelevant_embeddings = [emb for emb, fb in zip(article_embeddings, feedback) if fb == 0]

    # Compute the refined query vector
    if relevant_embeddings:
        relevant_vector = np.mean(relevant_embeddings, axis=0) * weight_relevance
    else:
        # Fallback if no relevant feedback is given
        relevant_vector = np.mean(article_embeddings, axis=0)

    # Optionally adjust based on irrelevant embeddings
    if irrelevant_embeddings:
        irrelevant_vector = np.mean(irrelevant_embeddings, axis=0) * weight_irrelevance
        final_vector = relevant_vector - irrelevant_vector  # Subtract irrelevant components
    else:
        final_vector = relevant_vector

    return final_vector


In [None]:
# Function to retrieve new articles using the refined vector
def retrieve_improved_results(final_vector, index, top_k=5):
    # Query Pinecone using the refined vector
    query_result = index.query(vector=final_vector.tolist(), top_k=top_k, include_metadata=True, include_values=True)

    # Extract metadata from results and structure in the same format
    articles = []
    for match in query_result['matches']:
        title = match['metadata'].get('title', 'No title available')
        abstract = match['metadata'].get('abstract', 'No abstract available')
        articles.append((title, abstract))  # Format as tuple for consistency

    return articles


In [None]:
feedback_csv_file = "feedback_vs_expanded_results.csv"

# Check if CSV exists and load it if it does
if os.path.exists(feedback_csv_file):
    results_df = pd.read_csv(feedback_csv_file)
else:
    # Initialize an empty DataFrame if the CSV does not exist
    columns = [
        "query", "expanded_query", "title", "abstract",
        "expanded_score", "feedback_adjusted_score", "cosine_similarity",
        "source_query", "overlap_count"
    ]
    results_df = pd.DataFrame(columns=columns)

# Function to calculate cosine similarity
def calculate_cosine_similarity(query_embedding, abstract_embedding):
    return cosine_similarity([query_embedding], [abstract_embedding])[0][0]

# Function to compare expanded method vs. feedback-adjusted method
def test_feedback_vs_expanded(questions, top_k=5):
    global results_df

    for query in questions:
        # Retrieve articles and scores using the expanded query
        print(f"Running expanded query retrieval for: {query}")
        expanded_articles, expanded_scores = get_top_5_articles(query, index)
        avg_expanded_score = np.mean(expanded_scores)
        query_embedding = encode_text(query)  # Embedding of the original query

        # Track articles from expanded method for overlap count
        expanded_titles = {title for title, _ in expanded_articles}
        all_articles = {}

        # Process expanded query articles
        for i, (title, abstract) in enumerate(expanded_articles):
            abstract_embedding = encode_text(abstract)
            cosine_sim = calculate_cosine_similarity(query_embedding, abstract_embedding)
            all_articles[title] = {
                "query": query,
                "expanded_query": expand_query(query),
                "title": title,
                "abstract": abstract,
                "expanded_score": expanded_scores[i],
                "cosine_similarity": cosine_sim,
                "source_query": "expanded",
                "feedback_adjusted_score": None
            }

        # Feedback adjustment
        article_embeddings = [encode_text(article[1]) for article in expanded_articles]
        feedback = [1 if score > avg_expanded_score else 0 for score in expanded_scores]  # Generate sample feedback
        final_vector = refine_query_with_feedback(article_embeddings, feedback)

        # Retrieve feedback-adjusted articles and evaluate
        feedback_articles = retrieve_improved_results(final_vector, index, top_k)
        feedback_scores = evaluate_articles(query, feedback_articles)

        # Process feedback-adjusted articles, checking for overlap
        for i, (title, abstract) in enumerate(feedback_articles):
            abstract_embedding = encode_text(abstract)
            cosine_sim = calculate_cosine_similarity(query_embedding, abstract_embedding)
            source_query = "feedback_adjusted" if title not in expanded_titles else "overlap"
            all_articles[title] = {
                "query": query,
                "expanded_query": expand_query(query),
                "title": title,
                "abstract": abstract,
                "expanded_score": expanded_scores[i] if source_query == "overlap" else None,
                "cosine_similarity": cosine_sim,
                "source_query": source_query,
                "feedback_adjusted_score": feedback_scores[i]
            }

        # Add results to DataFrame
        for article_data in all_articles.values():
            new_row = pd.DataFrame([article_data])
            results_df = pd.concat([results_df, new_row], ignore_index=True)

        # Calculate overlap count
        overlap_count = sum(1 for article in all_articles.values() if article["source_query"] == "overlap")

        # Save intermediate results to the CSV file
        results_df.to_csv(feedback_csv_file, index=False)
        print(f"Results saved to CSV after query: {query}")

    return results_df

# List of questions to test
questions = [
    "What are the latest techniques for explainability in deep neural networks?",
    "How is transfer learning being applied in medical imaging analysis?",
    "What are the advancements in handling class imbalance in supervised learning?",
    "How can reinforcement learning improve autonomous vehicle navigation?",
    "What methods exist for detecting outliers in financial time series?",
    "What are recent innovations in synthetic data generation for NLP?",
    "How is unsupervised learning being used in anomaly detection?",
    "What are the latest approaches to causal inference in data science?",
    "How is AI being used to optimize supply chain management?",
    "What are novel techniques for feature engineering in time series data?",
    "How can GANs (Generative Adversarial Networks) improve image resolution?",
    "What role does data augmentation play in small dataset training?",
    "How is federated learning enhancing privacy in healthcare applications?",
    "What are effective methods for hyperparameter tuning in deep learning?",
    "How can active learning reduce labeling costs in supervised learning?",
    "What are the advancements in dimensionality reduction techniques for visualization?",
    "How is NLP being used to analyze sentiment in financial news?",
    "What methods are effective for handling missing data in datasets?",
    "How is transfer learning improving outcomes in drug discovery research?",
    "What are the latest machine learning techniques for predictive maintenance?"
]

# Run the test function with each question, waiting 35 seconds between each to manage API rate limits
for question in questions:
    print(f"Running expanded vs feedback-adjusted comparison for question: {question}")
    results_df = test_feedback_vs_expanded([question])  # Test with each question
    print(f"Completed query for: {question}")
    print("Waiting 35 seconds before the next run...")
    time.sleep(35)

print("All queries completed.")


**Table of results and statistics of performances in the top 5:**

In [None]:
results_df = pd.read_csv("feedback_vs_expanded_results.csv")

# Group by query to analyze each query individually
query_groups = results_df.groupby('query')

# Initialize a list to store counts of new feedback articles per query
feedback_counts = []

for query, group in query_groups:
    # Sort by model score first, and use cosine similarity as a tiebreaker
    group['final_score'] = group[['expanded_score', 'feedback_adjusted_score']].max(axis=1)  # Highest score
    top_5 = group.sort_values(by=['final_score', 'cosine_similarity'], ascending=[False, False]).head(5)

    # Count the number of new articles from the feedback-adjusted method in the top 5
    feedback_count = (top_5['source_query'] == 'feedback_adjusted').sum()
    feedback_counts.append(feedback_count)

    # Print results for each query
    print(f"Query: {query}")
    print(f"Top 5 Articles:")
    print(top_5[['title', 'final_score', 'cosine_similarity', 'source_query']])
    print(f"Number of new feedback-adjusted articles in top 5: {feedback_count}\n")

# Calculate and print the average count of feedback-adjusted articles in the top 5 across all queries
average_feedback_count = sum(feedback_counts) / len(feedback_counts)
print(f"Average number of new feedback-adjusted articles in the top 5 across all queries: {average_feedback_count}")


Query: How can GANs (Generative Adversarial Networks) improve image resolution?
Top 5 Articles:
                                                title  final_score  \
76            Infrared Image Super-Resolution via GAN     9.666667   
73  Generative Adversarial Networks for Image Supe...     9.333333   
75  A Generative Model for Hallucinating Diverse V...     9.333333   
78  Details or Artifacts: A Locally Discriminative...     9.333333   
74  A General Method to Incorporate Spatial Inform...     9.000000   

    cosine_similarity       source_query  
76           0.648041            overlap  
73           0.728315            overlap  
75           0.702885            overlap  
78           0.693186  feedback_adjusted  
74           0.732697            overlap  
Number of new feedback-adjusted articles in top 5: 1

Query: How can reinforcement learning improve autonomous vehicle navigation?
Top 5 Articles:
                                                title  final_score  \
23  Eval