In [41]:
import requests
from bs4 import BeautifulSoup
from typing import List, Dict

from serpapi import GoogleSearch

from dotenv import load_dotenv
import os

from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
import json

from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.schema import Document

from langchain.text_splitter import CharacterTextSplitter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

from langchain.prompts import ChatPromptTemplate

In [42]:
print("Current working directory:", os.getcwd())
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key:
    print('OPEN API KEY FOUND')
else:
    print("No API key found")

serp_api_key = os.getenv("SERP_API_KEY")
if serp_api_key:
    print('SERP API KEY FOUND')
else:
    print("No SERP API key found")

sarvam_api_key = os.getenv("SARVAM_API_KEY")
if sarvam_api_key:
    print('SARVAM API KEY FOUND')
else:
    print("No SARVAM API key found")

grok_api_key = os.getenv("GROK_API_KEY")
if grok_api_key:
    print('GROK API KEY FOUND')
else:
    print("No GROK API key found")

serp_dev_api_key = os.getenv("SERP_DEV_API_KEY")
if serp_dev_api_key:
    print('SERP DEV API KEY FOUND')
else:
    print("No SERP API key found")

tav_dev_api_key = os.getenv("TAVILY_API_KEY")
if tav_dev_api_key:
    print('TAV DEV API KEY FOUND')
else:
    print("No TAV API key found")

Current working directory: c:\Users\rahul\OneDrive\7_Learning\IISC\Courses\3.1_Deep_Learning\Course Material\Project\wip
OPEN API KEY FOUND
SERP API KEY FOUND
SARVAM API KEY FOUND
GROK API KEY FOUND
SERP DEV API KEY FOUND
TAV DEV API KEY FOUND


In [43]:
class SerperRetrieverWrapper:
    def __init__(self, api_key: str, num_results: int = 5):
        self.api_key = api_key
        self.num_results = num_results
    
    def get_relevant_documents(self, query: str):
        """
        Query Serper.dev and return up to `num_results` organic search hits.
        Each hit is a dict: { "title": str, "link": str, "snippet": str }.
        Raises HTTPError on non-2xx responses.
        """
        _SERPER_SEARCH_URL = "https://google.serper.dev/search"
    
        headers = {
            "X-API-KEY": self.api_key,
            "Content-Type": "application/json"
        }
        payload = {
            "q": 'site:news18.com ' + query,
            "num": self.num_results,
        }
    
        resp = requests.post(_SERPER_SEARCH_URL, headers=headers, json=payload, timeout=5)

        if resp.status_code != 200:
            raise Exception(f"Serper API Error: {resp.text}")
        results = resp.json()
    
        documents = []
        for result in results.get("organic", [])[:self.num_results]:
            content = f"{result.get('title', '')}\n{result.get('snippet', '')}"
            documents.append(Document(page_content=content, metadata={"source": result.get("link", "")}))
    
        return documents

In [44]:
serper_retriever = SerperRetrieverWrapper(api_key=serp_dev_api_key)
context_retriever = RunnableLambda(serper_retriever.get_relevant_documents)

In [60]:
template = """You are an AI language model assistant. Your task is to generate three 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

llm3 = ChatGroq(api_key=grok_api_key, model_name="llama3-8b-8192")
generate_queries = (
    prompt_perspectives 
    | llm3
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [61]:
summarizer_prompt = PromptTemplate.from_template("""
You are an assistant summarizing factual evidence from multiple documents.

Based on the following documents, extract the key facts relevant to the claim.

Claim: {question}

Documents:
{context}

Return a short neutral summary of the key facts only.
""")

llm1 = ChatGroq(api_key=grok_api_key, model_name="llama3-8b-8192")
summarizer_chain = (
    {
        "context": context_retriever,
        "question": generate_queries   #RunnablePassthrough()
    }
    | summarizer_prompt
    | llm1
    | StrOutputParser()
)

In [62]:
judge_prompt = PromptTemplate.from_template("""
You are a fact-checking assistant.

Claim: {question}

Evidence:
{evidence}

Decide whether the claim is REAL or FAKE or UNSURE based only on the evidence.

Respond in this format:
Classification: REAL or FAKE
Explanation: <your reasoning>
""")

llm2 = ChatGroq(api_key=grok_api_key, model_name="llama3-8b-8192")
fact_checker_chain = (
    {
        "question": RunnablePassthrough(),
        "evidence": summarizer_chain 
    }
    | judge_prompt
    | llm2
    | StrOutputParser()
)

In [68]:
claim = "australia beats south africa in the test series final"

In [69]:
fact_checker_chain.invoke(claim)

'Classification: FAKE\nExplanation: The evidence provided clearly shows that South Africa won the ICC World Test Championship final against Australia, not the other way around. The claim "Australia beats South Africa in the test series final" is directly contradicted by the fact that South Africa won the match by five wickets.'

In [71]:
summarizer_chain.invoke(claim)

"Here is a short neutral summary of the key facts:\n\n* South Africa won the ICC World Test Championship final against Australia.\n* The match was played at Lord's and South Africa won by 5 wickets on the fourth day.\n* Aiden Markram and Bavuma scored centuries for South Africa in the match.\n* Australia did not win the Test series against South Africa."

In [70]:
generate_queries.invoke(claim)

['Here are three alternative versions of the user question to retrieve relevant documents from a vector database:',
 '',
 'Cricket: Australia wins Test series over South Africa in the final match',
 '',
 'South Africa loses Test series to Australia in the final match, with Australia emerging victorious',
 '',
 'Australia defeats South Africa in the deciding Test match of the series, securing a victory']

In [63]:
from sarvamai import SarvamAI

In [64]:
client = SarvamAI(api_subscription_key="474af18e-b1e7-4fba-b201-cc4b4f52ed3c")

In [66]:
translation = client.text.translate(
    input="यह एक नमूना पाठ है।",
    source_language_code="auto",
    target_language_code="hi-IN"
)

In [67]:
translation

TranslationResponse(request_id='20250616_c452728d-010b-4410-83c3-52abe5c3439e', translated_text='यह एक नमूना पाठ है।', source_language_code='hi-IN')