In [None]:
API_ENDPOINT = "http://81.171.3.27:65432/models"
API_KEY = ""

In [None]:
!pip install langchain langchain-core langchain-azure-ai langchain-milvus

Collecting langchain-azure-ai
  Downloading langchain_azure_ai-0.1.2-py3-none-any.whl.metadata (3.6 kB)
Collecting langchain-milvus
  Downloading langchain_milvus-0.1.8-py3-none-any.whl.metadata (1.9 kB)
Collecting azure-ai-inference<2.0.0,>=1.0.0b7 (from azure-ai-inference[opentelemetry]<2.0.0,>=1.0.0b7->langchain-azure-ai)
  Downloading azure_ai_inference-1.0.0b9-py3-none-any.whl.metadata (34 kB)
Collecting azure-core<2.0.0,>=1.32.0 (from langchain-azure-ai)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Collecting azure-cosmos<5.0.0,>=4.9.0 (from langchain-azure-ai)
  Downloading azure_cosmos-4.9.0-py3-none-any.whl.metadata (80 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting azure-identity<2.0.0,>=1.15.0 (from langchain-azure-ai)
  Downloading azure_identity-1.21.0-py3-none-any.whl.metadata (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 k

In [None]:
from langchain_azure_ai.embeddings import AzureAIEmbeddingsModel
from langchain_azure_ai.chat_models import AzureAIChatCompletionsModel
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from sentence_transformers import CrossEncoder
from langchain_milvus import Milvus
from langchain.schema import Document
from langchain.chains import SequentialChain, LLMChain
from langchain.prompts import PromptTemplate
from IPython.display import display, Markdown

embeddings = AzureAIEmbeddingsModel(endpoint=API_ENDPOINT, credential=API_KEY, model_name="text-embedding-3-small",)

store = Milvus(embeddings, connection_args={"uri": "/content/milvus-cahiers-du-foot.db"})

model = AzureAIChatCompletionsModel(endpoint=API_ENDPOINT, credential=API_KEY, model_name="gpt-4o")

In [None]:
def answer(question, topk, model = model, store = store):
  pre_retrieval_prompt = PromptTemplate(input_variables=["original_question"], template="Rewrite this query to maximize retrieval quality: {original_question}")
  pre_retrieval_chain = LLMChain(llm=model, prompt=pre_retrieval_prompt)
  optimized_question = pre_retrieval_chain.run({"original_question": question})

  retriever = store.as_retriever(search_type="similarity", search_kwargs={"k": topk})
  retrieved_docs = retriever.invoke(question)

  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
  query_doc_pairs = [(question, doc.page_content) for doc in retrieved_docs]
  scores = reranker.predict(query_doc_pairs)

  sorted_docs = [doc for _, doc in sorted(enumerate(zip(scores, retrieved_docs)), key=lambda x: x[1][0], reverse=True)][:topk]

  formatted_docs = []
  for doc in retrieved_docs:
    article_id = doc.metadata.get("article_id", "Unknown ID")
    article_title = doc.metadata.get("title", "Unknown Title")
    article_url = doc.metadata.get("url", "Unknown URL")
    content = doc.page_content
    formatted_docs.append(f"*Article ID:** {article_id}\n**URL:** {article_url}\n**Title:**: {article_title}\n**Content:** {content}")

  documents_str = "\n\n".join(formatted_docs)

  verification_prompt = PromptTemplate(input_variables=["original_question", "retrieved_docs"], template=(
        "Here are the retrieved documents:\n{retrieved_docs}\n\n"
        "Generate verification questions to check for inconsistencies in the information."))
  verification_chain = LLMChain(llm=model, prompt=verification_prompt)
  verification_questions = verification_chain.run({"original_question": question, "retrieved_docs": documents_str})

  execution_prompt = PromptTemplate(input_variables=["verification_questions", "retrieved_docs"], template=(
        "Given these verification questions:\n{verification_questions}\n\n"
        "Check the retrieved documents for inconsistencies and summarize validated answers."))
  execution_chain = LLMChain(llm=model, prompt=execution_prompt)
  verified_answers = execution_chain.run({"verification_questions": verification_questions,"retrieved_docs": documents_str})

  final_answer_prompt = PromptTemplate(input_variables=["original_question", "verified_answers"], template=(
        "Given the original question:\n{original_question}\n\n"
        "And the verified answers:\n{verified_answers}\n\n"
        "Generate a structured and fact-checked response."))
  final_answer_chain = LLMChain(llm=model, prompt=final_answer_prompt)
  final_answer = final_answer_chain.run({"original_question": question, "verified_answers": verified_answers})

  messages = [
    SystemMessage(content="""You are a document analyst and a translator from french to english.
    The user will submit a few documents and give you a question with instructions.
    You follow these instructions and use these documents only. Please provide article titles, ID, URL and the content where your answer comes from.
    Translate the content of the article you found to english.
    If the documents do not allow an answer, just say you don't know in a polite way.
    Provide the scores of the chosen documents
    After answering, please verify your answer by using verification questions and refining it."""),
    HumanMessage(content=f"Here are the documents:\n\n{documents_str}"),
    HumanMessage(content=f"Here are the instructions: {question}")]

  chatbot_answer = model.invoke(messages)
  return display(Markdown(chatbot_answer.content))

In [None]:
question1 = input('Enter your question: ')
topk1 = int(input('Enter the number of the most similar documents to retrieve: '))
answer(question1, topk1)

Enter your question: Name a few key players in the French national team
Enter the number of the most similar documents to retrieve: 5


  pre_retrieval_chain = LLMChain(llm=model, prompt=pre_retrieval_prompt)
  optimized_question = pre_retrieval_chain.run({"original_question": question})
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Using the provided documents, here is the information on key players in the French national team:

### **1. Article ID:** b777ac3289eb5f665f3365fc3ebab4691463637a  
**URL:** [Blanc 1998, le but en Nord](https://www.cahiersdufootball.net/article/blanc-1998-le-but-en-nord-7025)  
**Title:** Blanc 1998, le but en Nord - Les Cahiers du football || magazine de foot et d'eau fraîche  
**Content:**  
*"Youri Djorkaeff est placé en meneur de jeu, soutenu par Didier Deschamps et Emmanuel Petit. Derrière, on retrouve le cinq majeur de la défense française: Thuram, Blanc, Desailly et Lizarazu, qui protègent le gardien Fabien Barthez."*  

**Translation to English:**  
*"Youri Djorkaeff was positioned as the playmaker, supported by Didier Deschamps and Emmanuel Petit. Behind them, the core five of the French defense included Thuram, Blanc, Desailly, and Lizarazu, who protected goalkeeper Fabien Barthez."*  

**Score:** High relevance  

---

### **2. Article ID:** 7604671d2e0f07b469989bfd845828606086e7ab  
**URL:** [Les internationaux français : comment s'appellent-ils?](https://www.cahiersdufootball.net/article/infographies-internationaux-france-prenoms-comment-s-appellent-ils-5024?page=1)  
**Title:** Les internationaux français : comment s'appellent-ils? - Les Cahiers du football || magazine de foot et d'eau fraîche  
**Content:**  
*"Jean: Djorkaeff, Petit. Laurent: Blanc. Patrick: Vieira."*

**Translation to English:**  
*"Jean: Djorkaeff, Petit. Laurent: Blanc. Patrick: Vieira."*  

**Score:** Medium relevance  

---

### **3. Article ID:** 49e5fd052113e96bf7655f40a9cbc695b3afc809  
**URL:** [Réservoir Coqs](https://www.cahiersdufootball.net/article/reservoir-coqs-2837)  
**Title:** Réservoir Coqs - Les Cahiers du football || magazine de foot et d'eau fraîche  
**Content:**  
*"Une ``nouvelle génération`` (Nasri – 20 ans, Benzema – 22, Gomis – 20, Diarra - 23) qui peut espérer une longue carrière en bleu…"*

**Translation to English:**  
*"A 'new generation' (Nasri – 20 years old, Benzema – 22, Gomis – 20, Diarra – 23) that can hope for a long career in the French national team…"*  

**Score:** Medium relevance  

---

### Key Players Mentioned:
- **1998 Team:** Youri Djorkaeff, Didier Deschamps, Emmanuel Petit, Lilian Thuram, Laurent Blanc, Marcel Desailly, Bixente Lizarazu, Fabien Barthez.  
- **Recent Generations:** Samir Nasri, Karim Benzema, Bafétimbi Gomis, Lassana Diarra.  
- **Other References:** Patrick Vieira  

---

### Verification Questions:
1. **Do other parts of the documents contradict this list of key players?**  
   No, the documents mention consistent names across different sections.  

2. **Did I miss any names explicitly stated in the documents?**  
   No, all provided names were included and appropriately sourced from the listed content.

### Final Answer Refinement:
All names are accurate and derived directly from the documents. The French national team has evolved over time, highlighting both historical stars and emerging talents. If you need further clarification, please feel free to ask.

In [None]:
question2 = input('Enter your question: ')
topk2 = int(input('Enter the number of the most similar documents to retrieve: '))
answer(question2, topk2)

Enter your question: Out of them, who is the most important player?
Enter the number of the most similar documents to retrieve: 5


Based on the provided documents, there isn't a clear statement regarding who is the "most important player" among the mentioned individuals. The articles discuss various players, including Mesut Özil, Cristiano Ronaldo, Iniesta, Müller, Jamie Vardy, Riyad Mahrez, Ngolo Kanté, Karim Benzema, and Roberto Firmino, highlighting their qualities and roles. However, the "most important player" depends on subjective interpretation, which isn't explicitly provided in these texts.

**Scores of the chosen documents:**  
None of the articles explicitly determine the most important player, so I cannot select one based on the question.

If you'd like to refine the question or give more specific criteria to determine importance, I would gladly reevaluate.