In [1]:
import json
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
import re
from dotenv import load_dotenv

In [2]:
load_dotenv()

with open("data/c3ai_videos.json", "r", encoding="utf-8") as f:
    data = json.load(f)

title_list = [item["title"] for item in data]

title_list = [re.sub(r'c3\s?ai', '', title, flags=re.IGNORECASE).strip()
                for title in title_list]

url_list = [item["url"] for item in data]

In [5]:
metadatas = [{"url": url} for url in url_list]

vectorstore = FAISS.from_texts(title_list, OpenAIEmbeddings(), metadatas=metadatas)

In [9]:
query = "Learn the latest agentic advances from the company"
docs = vectorstore.similarity_search(query, k=3)
print(f"Rank#1: {docs[0].page_content} - Link: {docs[0].metadata['url']}")
print(f"Rank#2: {docs[1].page_content} - Link: {docs[1].metadata['url']}")
print(f"Rank#3: {docs[2].page_content} - Link: {docs[2].metadata['url']}")

Rank#1: Empowering AI Application Developers with ’s Latest Agentic AI Developer Innovations​ - Link: https://www.youtube.com/watch?v=01rldii7b_Q
Rank#2: Agentic AI Optimizes Processes in the Medical Equipment Manufacturing Industry - Link: https://www.youtube.com/watch?v=RTKNdCUeNsk
Rank#3: Transform Enterprise Automation with AI Agents |  Agentic Process Automation - Overview - Link: https://www.youtube.com/watch?v=ZfUe1NwLri0
