In [7]:
!pip install pypdf faiss-cpu sentence-transformers


Collecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf, faiss-cpu
Successfully installed faiss-cpu-1.12.0 pypdf-6.0.0


In [8]:
from pypdf import PdfReader
reader = PdfReader("/content/Essentials of Geographic Information Systems.pdf")

text = ""
for page in reader.pages:
    text += page.extract_text()

# Split into small chunks (concepts/paragraphs)
chunks = text.split("\n\n")


In [9]:
from sentence_transformers import SentenceTransformer
import faiss

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(chunks)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
def search(query, top_k=3):
    q_emb = model.encode([query])
    D, I = index.search(q_emb, top_k)
    return [chunks[i] for i in I[0]]

print(search("Explain raster vs vector data"))


['Saylor URL: http://www.saylor.org/books  Saylor.org \n  1 \nThis text was adapted by The Saylor Foundation under a Creative \nCommons Attribution-NonCommercial-ShareAlike 3.0 License without \nattribution as requested by the work’s original creator or licensee. \n  Saylor URL: http://www.saylor.org/books  Saylor.org \n  2 \nPreface \nMaps are everywhere— on the Internet, in your car, and even on your mobile phone. Moreover, maps \nof the twenty-first century are not just paper diagrams folded like an accordion. Maps today are \ncolorful, searchable, interactive, and shared. This transformation of the static map into dynamic and \ninteractive multimedia reflects the integration of technological innovation and vast amounts of \ngeographic data. The key technology behind this integration, and subsequently the maps of the \ntwenty-first century, is geographic information systems or GIS. \nPut simply, GIS is a special type of information technology that integrates data and information fro

In [11]:
def make_slide(concept, text):
    sentences = text.split(". ")
    bullets = [s.strip() for s in sentences[:3] if len(s) > 5]  # 3 bullet points
    narration = text.strip()

    slide = {
        "title": concept,
        "bullets": bullets,
        "narration": narration
    }
    return slide


In [12]:
query = "Raster data"
results = search(query, top_k=1)

for r in results:
    slide = make_slide(query, r)
    print(slide)


{'title': 'Raster data', 'bullets': ['Saylor URL: http://www.saylor.org/books  Saylor.org \n  1 \nThis text was adapted by The Saylor Foundation under a Creative \nCommons Attribution-NonCommercial-ShareAlike 3.0 License without \nattribution as requested by the work’s original creator or licensee', 'Saylor URL: http://www.saylor.org/books  Saylor.org \n  2 \nPreface \nMaps are everywhere— on the Internet, in your car, and even on your mobile phone', 'Moreover, maps \nof the twenty-first century are not just paper diagrams folded like an accordion'], 'narration': 'Saylor URL: http://www.saylor.org/books  Saylor.org \n  1 \nThis text was adapted by The Saylor Foundation under a Creative \nCommons Attribution-NonCommercial-ShareAlike 3.0 License without \nattribution as requested by the work’s original creator or licensee. \n  Saylor URL: http://www.saylor.org/books  Saylor.org \n  2 \nPreface \nMaps are everywhere— on the Internet, in your car, and even on your mobile phone. Moreover, m

In [13]:
import json

all_slides = []

query = "Raster data"
results = search(query, top_k=2)

for r in results:
    slide = make_slide(query, r)
    all_slides.append(slide)

# Save to JSON
with open("slides.json", "w") as f:
    json.dump(all_slides, f, indent=2)

print("✅ Slides saved to slides.json")


✅ Slides saved to slides.json


In [16]:
from pptx import Presentation

def save_to_ppt(slides, filename="slides.pptx"):
    prs = Presentation()
    for s in slides:
        layout = prs.slide_layouts[1]  # Title + Content
        slide = prs.slides.add_slide(layout)

        title = slide.shapes.title
        content = slide.placeholders[1]

        title.text = s["title"]
        content.text = "\n".join(s["bullets"])

    prs.save(filename)
    print(f"✅ Presentation saved as {filename}")

# Example
save_to_ppt(all_slides, "gis_slides.pptx")


✅ Presentation saved as gis_slides.pptx


In [15]:
!pip install python-pptx

Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading xlsxwriter-3.2.8-py3-none-any.whl.metadata (2.7 kB)
Downloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xlsxwriter-3.2.8-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.5/175.5 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter, python-pptx
Successfully installed XlsxWriter-3.2.8 python-pptx-1.0.2


In [17]:
!pip install pypdf

from pypdf import PdfReader

# Load your book
reader = PdfReader("/content/Essentials of Geographic Information Systems.pdf")

# Extract all text
text = ""
for page in reader.pages:
    page_text = page.extract_text()
    if page_text:
        text += page_text + "\n"

print("Total characters extracted:", len(text))


Total characters extracted: 388949


In [18]:
import re

# Split by double newline
raw_chunks = text.split("\n\n")

# Further split into smaller chunks (~5 sentences each)
chunks = []
for chunk in raw_chunks:
    sentences = re.split(r'(?<=[.!?]) +', chunk)
    for i in range(0, len(sentences), 5):  # 5 sentences per chunk
        sub_chunk = " ".join(sentences[i:i+5])
        if len(sub_chunk) > 20:  # skip very short chunks
            chunks.append(sub_chunk)

print("Total chunks created:", len(chunks))


Total chunks created: 661


In [19]:
!pip install sentence-transformers faiss-cpu

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Load model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Create embeddings for all chunks
embeddings = model.encode(chunks, convert_to_numpy=True)

# Build FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

print("FAISS index built. Total entries:", index.ntotal)


FAISS index built. Total entries: 661


In [21]:
faiss.write_index(index, "faiss_index.index")
np.save("chunks.npy", np.array(chunks, dtype=object))
print("Index and chunks saved for future queries")


Index and chunks saved for future queries


In [23]:
# Load index and chunks if needed
index = faiss.read_index("faiss_index.index")
chunks = np.load("chunks.npy", allow_pickle=True)

def search(query, top_k=5):
    q_emb = model.encode([query])
    D, I = index.search(q_emb, top_k)
    return [chunks[i] for i in I[0]]

# Test search
query = "Raster vs Vector data"
results = search(query, top_k=5)
for i, r in enumerate(results):
    print(f"Result {i+1}:\n{r}\n---\n")

Result 1:
Saylor URL: http://www.saylor.org/books  Saylor.org 
  93 
In comparison with the raster data model, vector data models tend to be better representations of reality 
due to the accuracy and precision of points, lines, and polygons over the regularly spaced grid cells of the 
raster model. This results in vector data tending to be more aesthetically pleasing than raster data. 
Vector data also provides an increased ability to alter the scale of observation and analysis. As each 
coordinate pair associated with a point, line, and polygon represents an infinitesimally exact location 
(albeit limited by the number of significant digits and/or data acquisition methodologies), zooming deep 
into a vector image does not change the view of a vector graphic in the way that it does a raster graphic 
(see Figure 4.1 "Digital Picture with Zoomed Inset Showing Pixilation of Raster Image"). 
Vector data tend to be more compact in data structure, so file sizes are typically much smaller tha

In [24]:
def make_slide(concept, text):
    sentences = text.split(". ")
    bullets = [s.strip() for s in sentences[:3] if len(s) > 5]  # 3 bullet points
    narration = text.strip()
    return {
        "title": concept,
        "bullets": bullets,
        "narration": narration
    }

all_slides = []
for r in results:
    slide = make_slide(query, r)
    all_slides.append(slide)

# Save slides as JSON
import json
with open("slides.json", "w") as f:
    json.dump(all_slides, f, indent=2)

print("✅ Slides saved to slides.json")


✅ Slides saved to slides.json


In [25]:
!pip install python-pptx
from pptx import Presentation

def save_to_ppt(slides, filename="slides.pptx"):
    prs = Presentation()
    for s in slides:
        layout = prs.slide_layouts[1]  # Title + Content
        slide = prs.slides.add_slide(layout)
        slide.shapes.title.text = s["title"]
        slide.placeholders[1].text = "\n".join(s["bullets"])
    prs.save(filename)
    print(f"✅ Presentation saved as {filename}")

save_to_ppt(all_slides, "gis_slides.pptx")


✅ Presentation saved as gis_slides.pptx


In [3]:
# Install system dependencies for Manim
!apt update
!apt install -y libcairo2-dev libpango1.0-dev ffmpeg libavdevice-dev libavfilter-dev libavformat-dev libavcodec-dev libswscale-dev pkg-config python3-dev

# Upgrade pip, setuptools, wheel
!pip install --upgrade pip setuptools wheel

# Install Manim
!pip install manim


[33m0% [Working][0m            Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease
            Hit:2 https://cli.github.com/packages stable InRelease
Hit:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
50 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mSk

In [1]:
import json

# Load slides
with open("slides.json") as f:
    slides = json.load(f)

# Manim script content as a string
manim_code = f'''
from manim import *

class SlideVideo(Scene):
    def construct(self):
        slides = {slides}

        for s in slides:
            title = Text(s["title"], font_size=48).to_edge(UP)
            bullets_text = "\\n".join(s["bullets"])
            bullets = Text(bullets_text, font_size=32).next_to(title, DOWN)

            self.play(Write(title))
            self.wait(0.5)
            self.play(Write(bullets))
            self.wait(2)
            self.clear()
'''

# Save to a .py file
with open("slide_video.py", "w") as f:
    f.write(manim_code)

print("✅ Manim script saved as slide_video.py")


✅ Manim script saved as slide_video.py


In [2]:
!manim -pql slide_video.py SlideVideo


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):
Manim Community [32mv0.[0m[32m19.0[0m

[2;36m[09/15/25 08:00:22][0m[2;36m [0m[32mINFO    [0m Animation [32m0[0m : Partial      ]8;id=462402;file:///usr/local/lib/python3.12/dist-packages/manim/scene/scene_file_writer.py\[2mscene_file_writer.py[0m]8;;\[2m:[0m]8;id=411259;file:///usr/local/lib/python3.12/dist-packages/manim/scene/scene_file_writer.py#588\[2m588[0m]8;;\
[2;36m                    [0m         movie file written in      [2m                        [0m
[2;36m                    [0m         [32m'/content/media/videos/sli[0m [2m                        [0m
[2;36m                    [0m         [32mde_video/480p15/partial_mo[0m [2m                        [0m
[2;36m                    [0m         [32mvie_file

In [8]:
!manim -ql slide_video.py SlideVideo


Manim Community [32mv0.[0m[32m19.0[0m

[2;36m[09/15/25 08:06:53][0m[2;36m [0m[32mINFO    [0m Animation [32m0[0m : Using cached     ]8;id=481281;file:///usr/local/lib/python3.12/dist-packages/manim/renderer/cairo_renderer.py\[2mcairo_renderer.py[0m]8;;\[2m:[0m]8;id=241365;file:///usr/local/lib/python3.12/dist-packages/manim/renderer/cairo_renderer.py#89\[2m89[0m]8;;\
[2;36m                    [0m         data [1m([0mhash :                   [2m                    [0m
[2;36m                    [0m         1185818338_981136336_223132457 [2m                    [0m
[2;36m                    [0m         [1m)[0m                              [2m                    [0m
[2;36m                   [0m[2;36m [0m[32mINFO    [0m Animation [32m1[0m : Using cached     ]8;id=527559;file:///usr/local/lib/python3.12/dist-packages/manim/renderer/cairo_renderer.py\[2mcairo_renderer.py[0m]8;;\[2m:[0m]8;id=335103;file:///usr/local/lib/python3.12/dist-p

In [6]:
!ls media/manim/videos/slide_video.py/480p15/


ls: cannot access 'media/manim/videos/slide_video.py/480p15/': No such file or directory


In [9]:
from IPython.display import HTML
from base64 import b64encode

video_path = "media/videos/slide_video/480p15/SlideVideo.mp4"

mp4 = open(video_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(f"""
<video width=600 controls>
      <source src="{data_url}" type="video/mp4">
</video>
""")



In [11]:
# 1️⃣ Imports
from sentence_transformers import SentenceTransformer
import faiss, numpy as np
import json
from pptx import Presentation
from IPython.display import HTML
from base64 import b64encode

# 2️⃣ Load model, FAISS index, and chunks
model = SentenceTransformer("all-MiniLM-L6-v2")
index = faiss.read_index("faiss_index.index")
chunks = np.load("chunks.npy", allow_pickle=True)

# 3️⃣ Search function
def search(query, top_k=5):
    q_emb = model.encode([query])
    D, I = index.search(q_emb, top_k)
    return [chunks[i] for i in I[0]]

# 4️⃣ Slide generator
def make_slide(concept, text):
    sentences = text.split(". ")
    bullets = [s.strip() for s in sentences[:3] if len(s) > 5]
    narration = text.strip()
    return {"title": concept, "bullets": bullets, "narration": narration}

# 5️⃣ Save slides to JSON and PPTX
def save_slides(slides, json_file="slides.json", ppt_file="slides.pptx"):
    # JSON
    with open(json_file, "w") as f:
        json.dump(slides, f, indent=2)
    # PPTX
    prs = Presentation()
    for s in slides:
        slide = prs.slides.add_slide(prs.slide_layouts[1])
        slide.shapes.title.text = s["title"]
        slide.placeholders[1].text = "\n".join(s["bullets"])
    prs.save(ppt_file)
    print(f"✅ Saved JSON: {json_file} and PPTX: {ppt_file}")

# 6️⃣ Generate Manim script dynamically
def create_manim_script(slides, filename="slide_video.py"):
    manim_code = f'''
from manim import *

class SlideVideo(Scene):
    def construct(self):
        slides = {slides}

        for s in slides:
            title = Text(s["title"], font_size=48).to_edge(UP)
            bullets = Text("\\n".join(s["bullets"]), font_size=32).next_to(title, DOWN)
            self.play(Write(title))
            self.wait(0.5)
            self.play(Write(bullets))
            self.wait(2)
            self.clear()
'''
    with open(filename, "w") as f:
        f.write(manim_code)
    print(f"✅ Manim script saved: {filename}")

# 7️⃣ Display video in Colab
def display_video(video_path):
    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f'<video width=600 controls><source src="{data_url}" type="video/mp4"></video>')

# 8️⃣ Full pipeline function
def generate_concept_video(concept_query, top_k=3):
    results = search(concept_query, top_k)
    slides = [make_slide(concept_query, r) for r in results]
    save_slides(slides)
    create_manim_script(slides)

    # Render Manim video
    import os
    os.system("manim -ql slide_video.py SlideVideo")

    # Display video
    video_path = "media/videos/slide_video/480p15/SlideVideo.mp4"
    return display_video(video_path)

# 9️⃣ Example usage
generate_concept_video("Raster vs Vector data")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Saved JSON: slides.json and PPTX: slides.pptx
✅ Manim script saved: slide_video.py


In [12]:
!pip install gradio

import gradio as gr

iface = gr.Interface(
    fn=generate_concept_video,
    inputs="text",
    outputs="html",
    title="Knowledge Graph → Manim Video",
    description="Enter a concept, get an animated explanation video"
)

iface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b60718cc6e75ac794e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


