In [None]:
import os
import fitz  # PyMuPDF
import cv2
from moviepy import VideoFileClip, AudioFileClip
from gtts import gTTS
from langchain_community.llms import Ollama
from langchain_qdrant import QdrantVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate

In [None]:
llm = Ollama(model="mistral", temperature=0.7)

dense_embedder = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={"device": "cpu"}
)

In [None]:
qdrant = QdrantVectorStore.from_documents(
    documents=[],
    embedding=dense_embedder,
    url="",
    api_key="",
    prefer_grpc=True,
    collection_name="axRiv_research_papers", #collection name
)

In [None]:
explanation_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are an expert narrator who transforms formal academic research summaries into clear, engaging, and listener-friendly spoken scripts. "
     "Your narration should sound natural, conversational, and engaging — like explaining the key ideas to an intelligent but non-expert audience. "
     "Avoid heavy jargon unless necessary, and if used, briefly explain it in simple terms. "
     "Maintain accuracy while improving flow and clarity. "
     "Use transitions, emphasis, and storytelling techniques to keep the listener interested."
    ),
    ("user", 
     "Here is a research summary:\n\n{summary}\n\n"
     "Please rewrite this as a spoken-friendly narration, keeping it clear, concise, and engaging for a general audience."
    )
])
explanation_chain = explanation_prompt | llm

In [None]:
def narrate_summary(summary: str) -> str:
    response = explanation_chain.invoke({"summary": summary})
    return response.content if hasattr(response, "content") else response

In [None]:
def fetch_summary_by_title(title_query: str) -> str:
    docs = qdrant.similarity_search(title_query, k=20)
    for doc in docs:
        if title_query.lower() in doc.metadata.get("Title", "").lower():
            return doc.page_content
    return None


In [None]:
pdf_title_map = {
    "A Formal Measure of Machine Intelligence": "p1.pdf",
    "Creativity and Artificial Intelligence: A Digital Art Perspective": "p2.pdf",
    "Introduction to intelligent computing unit 1": "p3.pdf",
    "The Next Decade in AI:Four Steps Towards Robust Artificial Intelligence": "p4.pdf",
    "Perspective: Purposeful Failure in Artificial Life and Artificial Intelligence": "p5.pdf",
    "Machine Learning in Artificial Intelligence: Towards a Common Understanding": "p6.pdf",
    "Human in the AI loop via xAI and Active Learning for Visual Inspection": "p7.pdf",
    "Comprehensible Artificial Intelligence on Knowledge Graphs: A Survey.": "p8.pdf",
    "Watershed for Artificial Intelligence: Human Intelligence, Machine Intelligence, and Biological Intelligence": "p9.pdf",
    "Machine learning and deep learning": "p10.pdf"
}

In [None]:
def generate_video_for_paper(paper_title: str, papers_folder="papers"):
    summary = fetch_summary_by_title(paper_title)
    if not summary:
        return None, "Paper not found in vector database."

    natural_explanation = narrate_summary(summary)
    tts = gTTS(natural_explanation)
    tts.save("narration.mp3")
    audio_path = "narration.mp3"

    pdf_filename = pdf_title_map.get(paper_title)
    if not pdf_filename:
        return None, "PDF not found for this title."
    pdf_path = os.path.join(papers_folder, pdf_filename)

    image_folder = "pdf_frames"
    os.makedirs(image_folder, exist_ok=True)
    doc = fitz.open(pdf_path)
    image_files = []
    for i, page in enumerate(doc):
        pix = page.get_pixmap(dpi=150)
        img_path = os.path.join(image_folder, f"page_{i+1}.png")
        pix.save(img_path)
        image_files.append(img_path)
    doc.close()

    from moviepy import AudioFileClip
    audio_clip = AudioFileClip(audio_path)
    total_audio_duration = audio_clip.duration
    seconds_per_page = total_audio_duration / len(image_files)
    fps = 1  # keep 1 FPS so timing is simple

    pix = fitz.open(pdf_path)[0].get_pixmap(dpi=150)
    frame_size = (pix.width, pix.height)
    video_path = "video_no_audio.mp4"
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(video_path, fourcc, fps, frame_size)

    frames_to_repeat = int(seconds_per_page * fps)
    for img_path in image_files:
        frame = cv2.imread(img_path)
        for _ in range(frames_to_repeat):
            video_writer.write(frame)
    video_writer.release()

    from moviepy import VideoFileClip
    video_clip = VideoFileClip(video_path)
    final_clip = video_clip.with_audio(audio_clip)
    output_path = "final_video.mp4"
    final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")

    return output_path, None
