In [1]:
import os
import random
from openai import OpenAI

In [2]:
client = OpenAI()

In [6]:
def generate_queries_chatgpt(original_query):

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that generates multiple search queries based on a single input query."},
            {"role": "user", "content": f"Generate multiple search queries related to: {original_query}"},
            {"role": "user", "content": "OUTPUT (4 queries):"}
        ]
    )

    generated_queries = response
    return generated_queries


In [4]:
def vector_search(query, all_documents):
    available_docs = list(all_documents.keys())
    random.shuffle(available_docs)
    selected_docs = available_docs[:random.randint(2, 5)]
    scores = {doc: round(random.uniform(0.7, 0.9), 2) for doc in selected_docs}
    return {doc: score for doc, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)}

# Reciprocal Rank Fusion algorithm
def reciprocal_rank_fusion(search_results_dict, k=60):
    fused_scores = {}
    print("Initial individual search result ranks:")
    for query, doc_scores in search_results_dict.items():
        print(f"For query '{query}': {doc_scores}")
        
    for query, doc_scores in search_results_dict.items():
        for rank, (doc, score) in enumerate(sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)):
            if doc not in fused_scores:
                fused_scores[doc] = 0
            previous_score = fused_scores[doc]
            fused_scores[doc] += 1 / (rank + k)
            print(f"Updating score for {doc} from {previous_score} to {fused_scores[doc]} based on rank {rank} in query '{query}'")

    reranked_results = {doc: score for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)}
    print("Final reranked results:", reranked_results)
    return reranked_results

# Dummy function to simulate generative output
def generate_output(reranked_results, queries):
    return f"Final output based on {queries} and reranked documents: {list(reranked_results.keys())}"


# Predefined set of documents (usually these would be from your search database)
all_documents = {
    "doc1": "Climate change and economic impact.",
    "doc2": "Public health concerns due to climate change.",
    "doc3": "Climate change: A social perspective.",
    "doc4": "Technological solutions to climate change.",
    "doc5": "Policy changes needed to combat climate change.",
    "doc6": "Climate change and its impact on biodiversity.",
    "doc7": "Climate change: The science and models.",
    "doc8": "Global warming: A subset of climate change.",
    "doc9": "How climate change affects daily weather.",
    "doc10": "The history of climate change activism."
}

In [7]:
original_query = "impact of climate change"
generated_queries = generate_queries_chatgpt(original_query)

In [8]:
generated_queries

ChatCompletion(id='chatcmpl-8MnqzcK31fP9ZLGkqY9rt0OnnPlLy', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='1. How does climate change impact biodiversity?\n2. What are the economic consequences of climate change?\n3. What are the health effects of climate change?\n4. How does climate change impact food production?', role='assistant', function_call=None, tool_calls=None))], created=1700445545, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=41, prompt_tokens=48, total_tokens=89))

In [11]:
generated_queries.choices[0].message.content

'1. How does climate change impact biodiversity?\n2. What are the economic consequences of climate change?\n3. What are the health effects of climate change?\n4. How does climate change impact food production?'

In [13]:
all_results = {}
for query in generated_queries.choices[0].message.content:
    search_results = vector_search(query, all_documents)
    all_results[query] = search_results

reranked_results = reciprocal_rank_fusion(all_results)

final_output = generate_output(reranked_results, generated_queries)

Initial individual search result ranks:
For query '1': {'doc10': 0.77, 'doc4': 0.71}
For query '.': {'doc3': 0.88, 'doc7': 0.88, 'doc2': 0.84, 'doc8': 0.82, 'doc6': 0.72}
For query ' ': {'doc1': 0.85, 'doc9': 0.83, 'doc7': 0.78, 'doc4': 0.74, 'doc2': 0.7}
For query 'H': {'doc3': 0.88, 'doc6': 0.76, 'doc5': 0.7}
For query 'o': {'doc6': 0.77, 'doc1': 0.71}
For query 'w': {'doc5': 0.81, 'doc10': 0.76, 'doc3': 0.73}
For query 'd': {'doc9': 0.9, 'doc2': 0.79, 'doc7': 0.78, 'doc3': 0.74}
For query 'e': {'doc3': 0.88, 'doc5': 0.76, 'doc6': 0.72}
For query 's': {'doc1': 0.86, 'doc4': 0.75}
For query 'c': {'doc8': 0.88, 'doc2': 0.88, 'doc5': 0.73}
For query 'l': {'doc5': 0.84, 'doc7': 0.7}
For query 'i': {'doc1': 0.87, 'doc10': 0.84, 'doc8': 0.84, 'doc6': 0.74}
For query 'm': {'doc2': 0.78, 'doc8': 0.78, 'doc3': 0.73}
For query 'a': {'doc1': 0.86, 'doc4': 0.85, 'doc8': 0.82, 'doc5': 0.8}
For query 't': {'doc4': 0.89, 'doc2': 0.84, 'doc7': 0.82, 'doc10': 0.73, 'doc6': 0.71}
For query 'h': {'doc3

In [14]:
final_output

"Final output based on ChatCompletion(id='chatcmpl-8MnqzcK31fP9ZLGkqY9rt0OnnPlLy', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='1. How does climate change impact biodiversity?\\n2. What are the economic consequences of climate change?\\n3. What are the health effects of climate change?\\n4. How does climate change impact food production?', role='assistant', function_call=None, tool_calls=None))], created=1700445545, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=41, prompt_tokens=48, total_tokens=89)) and reranked documents: ['doc5', 'doc6', 'doc4', 'doc9', 'doc7', 'doc2', 'doc3', 'doc8', 'doc10', 'doc1']"

In [None]:
from openai import AzureOpenAI

In [None]:
client = AzureOpenAI()

In [None]:
client.chat.completions.create()