In [None]:
import requests

from dotenv import load_dotenv
import os

from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
import json

from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.schema import Document

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

from sarvamai import SarvamAI

In [3]:
print("Current working directory:", os.getcwd())
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key:
    print('OPEN API KEY FOUND')
else:
    print("No API key found")

serp_api_key = os.getenv("SERP_API_KEY")
if serp_api_key:
    print('SERP API KEY FOUND')
else:
    print("No SERP API key found")

sarvam_api_key = os.getenv("SARVAM_API_KEY")
if sarvam_api_key:
    print('SARVAM API KEY FOUND')
else:
    print("No SARVAM API key found")

grok_api_key = os.getenv("GROK_API_KEY")
if grok_api_key:
    print('GROK API KEY FOUND')
else:
    print("No GROK API key found")

serp_dev_api_key = os.getenv("SERP_DEV_API_KEY")
if serp_dev_api_key:
    print('SERP DEV API KEY FOUND')
else:
    print("No SERP API key found")

tav_dev_api_key = os.getenv("TAVILY_API_KEY")
if tav_dev_api_key:
    print('TAV DEV API KEY FOUND')
else:
    print("No TAV API key found")

Current working directory: c:\Users\rahul\OneDrive\7_Learning\IISC\Courses\3.1_Deep_Learning\Course Material\Project\wip
OPEN API KEY FOUND
SERP API KEY FOUND
SARVAM API KEY FOUND
GROK API KEY FOUND
SERP DEV API KEY FOUND
TAV DEV API KEY FOUND


In [4]:
class SerperRetrieverWrapper:
    def __init__(self, api_key: str, num_results: int = 5):
        self.api_key = api_key
        self.num_results = num_results
    
    def get_relevant_documents(self, query: str):
        """
        Query Serper.dev and return up to `num_results` organic search hits.
        Each hit is a dict: { "title": str, "link": str, "snippet": str }.
        Raises HTTPError on non-2xx responses.
        """
        _SERPER_SEARCH_URL = "https://google.serper.dev/search"
    
        headers = {
            "X-API-KEY": self.api_key,
            "Content-Type": "application/json"
        }
        payload = {
            "q": 'site:news18.com ' + query,
            "num": self.num_results,
        }
    
        resp = requests.post(_SERPER_SEARCH_URL, headers=headers, json=payload, timeout=5)

        if resp.status_code != 200:
            raise Exception(f"Serper API Error: {resp.text}")
        results = resp.json()
    
        documents = []
        for result in results.get("organic", [])[:self.num_results]:
            content = f"{result.get('title', '')}\n{result.get('snippet', '')}"
            documents.append(Document(page_content=content, metadata={"source": result.get("link", "")}))
    
        return documents

In [5]:
serper_retriever = SerperRetrieverWrapper(api_key=serp_dev_api_key)
context_retriever = RunnableLambda(serper_retriever.get_relevant_documents)

In [6]:
template = """You are an AI language model assistant. Your task is to generate three 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

llm3 = ChatGroq(api_key=grok_api_key, model_name="llama3-8b-8192")
generate_queries = (
    prompt_perspectives 
    | llm3
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [7]:
summarizer_prompt = PromptTemplate.from_template("""
You are an assistant summarizing factual evidence from multiple documents.

Based on the following documents, extract the key facts relevant to the claim.

Claim: {question}

Documents:
{context}

Return a short neutral summary of the key facts only.
""")

llm1 = ChatGroq(api_key=grok_api_key, model_name="llama3-8b-8192")
summarizer_chain = (
    {
        "context": context_retriever,
        "question": generate_queries   #RunnablePassthrough()
    }
    | summarizer_prompt
    | llm1
    | StrOutputParser()
)

In [8]:
judge_prompt = PromptTemplate.from_template("""
You are a fact-checking assistant.

Claim: {question}

Evidence:
{evidence}

Decide whether the claim is REAL or FAKE or UNSURE based only on the evidence.

Respond in this format:
Classification: REAL or FAKE
Explanation: <your reasoning>
""")

llm2 = ChatGroq(api_key=grok_api_key, model_name="llama3-8b-8192")
fact_checker_chain = (
    {
        "question": RunnablePassthrough(),
        "evidence": summarizer_chain 
    }
    | judge_prompt
    | llm2
    | StrOutputParser()
)

In [9]:
#claim = "australia beats south africa in the test series final"
#claim = '‡Æá‡Æ∏‡Øç‡Æ∞‡Øá‡Æ≤‡Øç ‡Æà‡Æ∞‡Ææ‡Æ©‡Æø‡Æ≤‡Øç ‡ÆÖ‡Æ£‡ØÅ‡Æï‡ØÅ‡Æ£‡Øç‡Æü‡ØÅ ‡Æµ‡ØÄ‡Æö‡ØÅ‡Æï‡Æø‡Æ±‡Æ§‡ØÅ'
#claim = '‡≤≠‡≤æ‡≤∞‡≤§‡≤µ‡≥Å ‡≤é‡≤≤‡≥ç‡≤≤‡≤æ ‡≤µ‡≤Ø‡≤∏‡≥ç‡≤ï‡≤∞‡≤ø‡≤ó‡≥Ü ‡≤∏‡≥à‡≤®‡≤ø‡≤ï ‡≤∏‡≥á‡≤µ‡≥Ü‡≤Ø‡≤®‡≥ç‡≤®‡≥Å ‡≤ï‡≤°‡≥ç‡≤°‡≤æ‡≤Ø‡≤µ‡≤æ‡≤ó‡≤ø‡≤∏‡≤ø‡≤¶‡≥Ü'
claim = '‡§´‡§º‡•Å‡§ü‡§¨‡•â‡§≤ ‡§µ‡§ø‡§∂‡•ç‡§µ ‡§ï‡§™ 2030 ‡§≠‡§æ‡§∞‡§§ ‡§Æ‡•á‡§Ç ‡§Ü‡§Ø‡•ã‡§ú‡§ø‡§§ ‡§ï‡§ø‡§Ø‡§æ ‡§ú‡§æ‡§è‡§ó‡§æ'

In [10]:
client = SarvamAI(api_subscription_key='474af18e-b1e7-4fba-b201-cc4b4f52ed3c')

In [11]:
try:
    translation = client.text.translate(
    input=claim,
    source_language_code="auto",
    target_language_code="en-IN"
    )
except Exception as e:
    print(f"Error during translation: {e}")  

In [14]:
claim_final = translation.translated_text if translation else claim
print(f"Translated claim: {claim_final}")

Translated claim: The Football World Cup 2030 will be held in India


In [15]:
fact_checker_chain.invoke(claim_final)

'Classification: FAKE\n\nExplanation: The provided documents do not mention India as a host country for the 2030 FIFA World Cup. Instead, they mention a joint bid led by Morocco, Spain, and Portugal, with no mention of India. Additionally, there is no confirmation in the documents that the 2030 FIFA World Cup will be held in India. Therefore, the claim that the 2030 FIFA World Cup will be held in India is false.'

In [None]:
import sys
import os
def get_audio_file():
    supported_formats = ['.wav', '.mp3']
    # Running in Jupyter Notebook: input file path
    audio_file_path = input("Enter the path to your MP3 or WAV file: ").strip()
    ext = os.path.splitext(audio_file_path)[1].lower()
    if not os.path.exists(audio_file_path):
        print(f"File not found at: {audio_file_path}")
        return None
    if ext not in supported_formats:
        print(f"Unsupported file format '{ext}'. Please provide a WAV or MP3 file.")
        return None
    print(f"File '{audio_file_path}' found successfully in Jupyter!")
    return audio_file_path

'sfsd'

In [None]:
if audio_file_path:
    with open(audio_file_path, "rb") as audio_file:
        response = client.speech_to_text.transcribe(
            file=audio_file,
            model="saarika:v2.5",
            language_code="unknown"
        )
    print("‚úÖ Transcription Response:")
    print(response)
else:
    print("üö´ No audio file found. Transcription aborted.")

In [None]:
try:
    translation = client.text.translate(
    input=response.transcript,
    source_language_code="auto",
    target_language_code="en-IN"
    )
except Exception as e:
    print(f"Error during translation: {e}")  