### Experimental workflow for SKY RAG semantic search feature

In [2]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import os
from dotenv import load_dotenv
from key_rotation import keystore

load_dotenv()
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")

In [3]:
LLM_MODEL = "gpt-4o"
API_BASE = "https://gw.api-dev.de.comcast.com/openai/v1"
OPENAI_API_KEY = keystore.get_api_key()  

In [4]:
model = ChatOpenAI(
    model=LLM_MODEL, 
    openai_api_base=API_BASE,
    api_key = OPENAI_API_KEY,
    streaming=False,
)

result = model.invoke("Write a 5 line summary about SKY News. Give response in bullet points.")
print(result.content)

- **Established News Outlet**: Sky News is a British-based news organization launched in 1989, delivering 24/7 coverage of current events globally.  
- **Multimedia Platforms**: It provides news through TV broadcasting, online streaming, mobile apps, and social media.  
- **Diverse Content**: The network covers a wide range of topics, including politics, business, technology, climate change, and world affairs.  
- **Reputation for Breaking News**: Known for its focus on live, breaking news stories, often presenting updates as events unfold.  
- **Global Reach**: While headquartered in the UK, Sky News maintains international bureaus and serves audiences worldwide.  


In [23]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_base=API_BASE)
result = embedding_model.embed_query("Hello world")
print(result)

[-0.008790180087089539, -0.010218530893325806, 0.006196710281074047, 0.031743038445711136, 0.008186044171452522, -0.006438364740461111, -0.004166381433606148, 0.07670802623033524, 0.027427779510617256, 0.02899853326380253, 0.0020206195767968893, -0.011642565950751305, -0.020160885527729988, -0.019211528822779655, -0.004997068550437689, 0.03686956316232681, -0.012617814354598522, -0.0027164549101144075, -0.007292785681784153, -0.01814134418964386, 0.022266730666160583, 0.003780165920034051, -0.017502686008810997, 0.05506269261240959, 0.00294947880320251, 0.024027356877923012, -0.014671877026557922, 0.005968001671135426, -0.03611007705330849, -0.02791108749806881, 0.006136296782642603, 0.016527438536286354, 0.010917602106928825, 0.014378439635038376, 0.023405959829688072, 0.005860120058059692, 0.025252889841794968, 0.018434781581163406, 0.005212831776589155, -0.0028955379966646433, 0.035523202270269394, 0.022370297461748123, -0.017166096717119217, 0.030603809282183647, 0.0115217389538884

In [24]:
print(len(result))

3072


## Chromadb - VectorDB initilization 

In [25]:
import chromadb
client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="sky_news_sample_collection")

In [29]:
# Indexing documents ingestion
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path='./data/articles.csv', encoding='utf-8')
documents = loader.load()
print(f"Loaded {len(documents)} documents")

Loaded 40 documents


In [34]:
documents[0].page_content

'id: ff7591c4-bcd3-49dd-b015-c162d8ec791b\ndata: {"id":"49966fbf-d35f-45f4-80d1-dd7448589160","body":"<widget id=\\"2\\"></widget>","type":{"id":"1532bf77-0dcc-4149-ad2a-faf7c3d09fc8","type":"Sky Q Story","references":[{"key":"@id","value":"47","source":"SHAK","isExternalReference":true}]},"audio":[],"title":"Mandelson sacked as ambassador to US","images":[{"id":"fe7ea6a6-4932-4fa7-9fa6-b2deea6b4632","addedBy":"Shazad Ahmed","caption":"Peter Mandelson walks on the day British Prime Minister Keir Starmer holds an emergency Cobra meeting.\\nPic Reuters","filename":"skynews-peter-mandelson-cobra_6945280.jpg","datePrefix":"25/06","originator":{"id":"92e83399-d766-47d4-8784-901793c0c34e","originatorName":"Reuters"},"references":[{"key":"notes","value":"British Ambassador to the United States Peter Mandelson walks on the day British Prime Minister Keir Starmer holds an emergency Cobra meeting to discuss Israel-Iran conflict, in London, Britain, June 18, 2025. REUTERS/Jaimi Joy\\r\\n","source

## Data cleaning and processing


In [43]:
import pandas as pd
df = pd.read_csv('./data/articles.csv')
df.head()

Unnamed: 0,id,data,external_identifier,source,proposition,provider,territory,status,type,publish_date,author_id,topic_ids,update_date
0,ff7591c4-bcd3-49dd-b015-c162d8ec791b,"{""id"":""49966fbf-d35f-45f4-80d1-dd7448589160"",""...",13428683,SHAK,SKY_NEWS,SKY,GB,Approved,Sky Q Story,2025-09-11 10:45:00,,,2025-09-11 10:52:34
1,f0c90855-2d7b-4b48-8007-ad86604a7693,"{""id"":""0a523f93-c116-4a46-a62a-046963c32ce2"",""...",13025497,SHAK,SKY_SPORTS,SKY,GB,Approved,Liveblog,2025-09-11 10:40:00,,,2025-09-11 10:41:13
2,26a97ec3-5938-4fe7-99ab-63fdc0a76adb,"{""id"":""34fb0587-d643-49db-9626-0a4ebfdd2e2b"",""...",13428674,SHAK,SKY_NEWS,SKY,GB,Approved,News Story,2025-09-11 10:33:00,||763||,|11386|,2025-09-11 10:48:55
3,d8070a3d-2494-45e4-ad52-f1a2a2a16e19,"{""id"":""354e6a7b-cfb0-4dc1-9839-fe4be15b9e04"",""...",13428596,SHAK,SKY_SPORTS,SKY,GB,Approved,News Story,2025-09-11 10:20:00,,,2025-09-11 10:51:00
4,b52a1dec-0f0c-4bc4-88cc-005e81eef9d5,"{""id"":""cf5d92b0-4d2e-4658-ab2f-8614f6345b42"",""...",13427982,SHAK,SKY_NEWS,SKY,GB,Approved,News Story,2025-09-11 10:09:00,,,2025-09-11 10:53:58


In [37]:
# df = df[['id', 'data']]
# df.head()

Unnamed: 0,id,data
0,ff7591c4-bcd3-49dd-b015-c162d8ec791b,"{""id"":""49966fbf-d35f-45f4-80d1-dd7448589160"",""..."
1,f0c90855-2d7b-4b48-8007-ad86604a7693,"{""id"":""0a523f93-c116-4a46-a62a-046963c32ce2"",""..."
2,26a97ec3-5938-4fe7-99ab-63fdc0a76adb,"{""id"":""34fb0587-d643-49db-9626-0a4ebfdd2e2b"",""..."
3,d8070a3d-2494-45e4-ad52-f1a2a2a16e19,"{""id"":""354e6a7b-cfb0-4dc1-9839-fe4be15b9e04"",""..."
4,b52a1dec-0f0c-4bc4-88cc-005e81eef9d5,"{""id"":""cf5d92b0-4d2e-4658-ab2f-8614f6345b42"",""..."


In [44]:
import json

def has_p_tag(json_str):
    try:
        data_obj = json.loads(json_str)
        body = data_obj.get("body", "")
        return "<p>" in body
    except Exception as e:
        return False

df['final_data'] = df['data'].apply(has_p_tag)
df = df[df['final_data'] == True]
df.head()

Unnamed: 0,id,data,external_identifier,source,proposition,provider,territory,status,type,publish_date,author_id,topic_ids,update_date,final_data
2,26a97ec3-5938-4fe7-99ab-63fdc0a76adb,"{""id"":""34fb0587-d643-49db-9626-0a4ebfdd2e2b"",""...",13428674,SHAK,SKY_NEWS,SKY,GB,Approved,News Story,2025-09-11 10:33:00,||763||,|11386|,2025-09-11 10:48:55,True
3,d8070a3d-2494-45e4-ad52-f1a2a2a16e19,"{""id"":""354e6a7b-cfb0-4dc1-9839-fe4be15b9e04"",""...",13428596,SHAK,SKY_SPORTS,SKY,GB,Approved,News Story,2025-09-11 10:20:00,,,2025-09-11 10:51:00,True
4,b52a1dec-0f0c-4bc4-88cc-005e81eef9d5,"{""id"":""cf5d92b0-4d2e-4658-ab2f-8614f6345b42"",""...",13427982,SHAK,SKY_NEWS,SKY,GB,Approved,News Story,2025-09-11 10:09:00,,,2025-09-11 10:53:58,True
5,c72a0b94-e168-4a6b-a848-554f28f8c492,"{""id"":""af7d51e8-b818-4c9d-b7d9-028adf7bb3c9"",""...",13428653,SHAK,SKY_SPORTS,SKY,GB,Approved,News Story,2025-09-11 10:08:00,,,2025-09-11 10:45:02,True
8,92ca4036-39b8-40fd-a1e3-2dbc9192bd1e,"{""id"":""08c689d0-d655-4c57-b9ad-b5881f8f44f3"",""...",13428630,SHAK,SKY_SPORTS,SKY,GB,Approved,News Story,2025-09-11 09:39:00,,|11400|,2025-09-11 10:15:58,True


In [42]:
len(df)

27

In [None]:
import json

def extract_body(json_str):
    try:
        data_obj = json.loads(json_str)
        data_body = data_obj.get("body", "")
        data__title = data_obj.get("title", "")
        return data__title + "\n" + data_body
    except Exception:
        return ""

#create embed documents list with documents from articles.csv and df['body'] column
# Extract the "body" field from the JSON in the "data" column and create a list of dicts for embedding
embed_documents = [
    {
        "id": row["id"], 
        "text": extract_body(row["data"])
    } for _, row in df.iterrows()
]

embed_documents[0]


{'id': '26a97ec3-5938-4fe7-99ab-63fdc0a76adb',
 'text': '<p>In the chaotic interim, this generated the extraordinary spectacle of No10 saying that they had full confidence in their man in Washington because - and it feels incredible to type this - No10 had been fully aware that the peer had an extended relationship with a convicted paedophile after the point he had been to jail in the US, and was content with this situation.</p><p>An incredible state of affairs.</p><p><strong>Politics latest:</strong> <strong><a href="https://news.sky.com/story/politics-latest-starmer-labour-badenoch-small-boats-farage-reform-12593360" target="_blank">Follow live updates</a></strong></p><p>This is why the issue has become a matter of Starmer\'s judgement almost as much as Peter Mandelson\'s.</p><p>Indeed, there were echoes here of the Chris Pincher affair that led to Boris Johnson\'s downfall - a leader stubbornly defending acts which revolted the bulk of the party, in a tone deaf act of self-harm.</p>

In [51]:
df.to_csv('./data/sky_news_sample_data.csv', index=False)

In [52]:
doc_loader = CSVLoader(file_path='./data/sky_news_sample_data.csv', encoding='utf-8')
documents = doc_loader.load()
print(f"Loaded {len(documents)} documents for embedding")

Loaded 27 documents for embedding


In [53]:
documents[0]

Document(metadata={'source': './data/sky_news_sample_data.csv', 'row': 0}, page_content='id: 26a97ec3-5938-4fe7-99ab-63fdc0a76adb\ndata: {"id":"34fb0587-d643-49db-9626-0a4ebfdd2e2b","body":"<p>In the chaotic interim, this generated the extraordinary spectacle of No10 saying that they had full confidence in their man in Washington because - and it feels incredible to type this - No10 had been fully aware that the peer had an extended relationship with a convicted paedophile after the point he had been to jail in the US, and was content with this situation.</p><p>An incredible state of affairs.</p><p><strong>Politics latest:</strong> <strong><a href=\\"https://news.sky.com/story/politics-latest-starmer-labour-badenoch-small-boats-farage-reform-12593360\\" target=\\"_blank\\">Follow live updates</a></strong></p><p>This is why the issue has become a matter of Starmer\'s judgement almost as much as Peter Mandelson\'s.</p><p>Indeed, there were echoes here of the Chris Pincher affair that led

In [56]:
embeddings = embedding_model.embed_documents([doc["text"] for doc in embed_documents])

In [58]:
# Store embeddings in ChromaDB
ids = [doc["id"] for doc in embed_documents]
collection.add(
    documents=[doc.page_content for doc in documents],
    embeddings=embeddings,
    ids=ids
)

In [59]:
ids[0]

'26a97ec3-5938-4fe7-99ab-63fdc0a76adb'

In [61]:
collection.get(ids=[ids[0]], include=["embeddings", "documents", "metadatas"])

{'ids': ['26a97ec3-5938-4fe7-99ab-63fdc0a76adb'],
 'embeddings': array([[ 0.0250881 ,  0.00700745, -0.01112656, ..., -0.00949568,
          0.00821536, -0.00689694]], shape=(1, 3072)),
 'documents': ['id: 26a97ec3-5938-4fe7-99ab-63fdc0a76adb\ndata: {"id":"34fb0587-d643-49db-9626-0a4ebfdd2e2b","body":"<p>In the chaotic interim, this generated the extraordinary spectacle of No10 saying that they had full confidence in their man in Washington because - and it feels incredible to type this - No10 had been fully aware that the peer had an extended relationship with a convicted paedophile after the point he had been to jail in the US, and was content with this situation.</p><p>An incredible state of affairs.</p><p><strong>Politics latest:</strong> <strong><a href=\\"https://news.sky.com/story/politics-latest-starmer-labour-badenoch-small-boats-farage-reform-12593360\\" target=\\"_blank\\">Follow live updates</a></strong></p><p>This is why the issue has become a matter of Starmer\'s judgement

## Retrival part


In [93]:
query = "I want to know somethings about olympic sprinting, don't create any new information, just use the document provided"
query_embedding = embedding_model.embed_query(query)
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=3,
    include=["documents", "embeddings", "metadatas", "distances"]
)

In [94]:
results

{'ids': [['c2615710-3f11-4154-852f-591cfd14009e',
   '943f3584-441f-4274-9834-f0fd944192b9',
   '89014c93-1711-42e2-b3a4-8d28a6f77773']],
 'embeddings': [array([[ 0.03633038, -0.04013201, -0.0178842 , ...,  0.00580575,
          -0.03188412, -0.01058672],
         [ 0.00908832, -0.00715866, -0.01457461, ..., -0.00195425,
          -0.00843753,  0.01266009],
         [ 0.02901864,  0.00799893, -0.01797213, ..., -0.00596198,
          -0.00133577,  0.00259906]], shape=(3, 3072))],
 'documents': [['id: c2615710-3f11-4154-852f-591cfd14009e\ndata: {"id":"219ac52d-ab45-4f0c-aee5-eef9b3cac0e1","body":"<p>The Jamaican set his mark at the 2009 world championships in Berlin, breaking his own 9.69 record from the previous year\'s Beijing Olympics, and it has now stood for longer than the 14 years of Jim Hines\'s 9.95 set at the 1968 Mexico City Olympics.</p><p>Research by Puma, the company that sponsored him through his glorious era of dominance, predicted that Bolt would run 9.42 in today\'s sho

In [95]:
results['documents'][0][0]

'id: c2615710-3f11-4154-852f-591cfd14009e\ndata: {"id":"219ac52d-ab45-4f0c-aee5-eef9b3cac0e1","body":"<p>The Jamaican set his mark at the 2009 world championships in Berlin, breaking his own 9.69 record from the previous year\'s Beijing Olympics, and it has now stood for longer than the 14 years of Jim Hines\'s 9.95 set at the 1968 Mexico City Olympics.</p><p>Research by Puma, the company that sponsored him through his glorious era of dominance, predicted that Bolt would run 9.42 in today\'s shoes and, speaking at an event ahead of the world championships in Tokyo, he said: \\"I fully agree.</p><ul><li><strong><a href=\\"https://www.skysports.com/more-sports/athletics/news/29876/13428227/explained-what-are-the-enhanced-games-and-is-mainstream-sport-under-threat\\">Explained: What are the \'Enhanced Games\' - and is mainstream sport under threat?</a></strong></li><li><a href=\\"https://www.skysports.com/more-sports/athletics/news/29175/13427830/world-athletics-championships-team-gb-targ

In [85]:
results['documents'][0][1]

'id: 943f3584-441f-4274-9834-f0fd944192b9\ndata: {"id":"2aac4eef-cc75-4cf0-a9d8-addca9f72bf1","body":"<p>Red Bull Powertrains and Ford have formed a technical partnership to build an engine for the new 2026 Formula 1 regulations which will almost certainly shake up the pecking order.</p><p>Mercedes dominated the early seasons of the last power unit change from 2014 and are understood to be confident about next year\'s engine.</p><ul><li><strong><a href=\\"https://www.skysports.com/f1/news/12433/13427106/lando-norris-oscar-piastri-insist-title-deciding-f1-races-wont-change-approach-to-obeying-mclaren-team-orders\\">Lando Norris, Oscar Piastri willing to obey team orders in final races</a></strong></li><li><strong><a href=\\"https://www.skysports.com/f1/schedule-results\\">F1 2025 schedule</a> | </strong><a href=\\"https://www.skysports.com/f1/standings\\"><strong>F1 championship standings</strong></a></li><li><a href=\\"https://qrcode.skysports.com/skysports/ContentPromo\\" target=\\"_b

In [86]:
results['documents'][0][2]

'id: 89014c93-1711-42e2-b3a4-8d28a6f77773\ndata: {"id":"ee07b149-df35-446e-b696-b393c33d1875","body":"<p>Featuring contributions from leading voices in sport, including Judy Murray, Helen Glover, Nasser Hussain and Baroness Tanni Grey-Thompson, <em>Game Changing: How sport gives every girl a better chance</em><em> </em>highlights that empowering girls through sport could generate £570m in annual productivity gains and save the NHS £73m a year.</p><p>Playing sport as a child is shown to be as strong an indicator of women reaching senior positions as a university degree, yet one in three girls say boys still have wider access to a range of sports.</p><ul><li><a href=\\"http://www.nowtv.com/sports-purchase?DCMP=ilc_skysports_SEO_promo\\"><strong>Sky Sports to show 90 per cent of 2025/26 WSL matches</strong></a></li><li><a href=\\"https://qrcode.skysports.com/skysports/WatchWSLApp\\" target=\\"_blank\\"><strong>Got Sky? Watch the WSL on the Sky Sports app</strong></a><strong> </strong>📱</l

In [87]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""
      You are a helpful news assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.
      If some context is not related to the question, ignore it.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)



In [96]:
import json
import re

def extract_json_str(text):
    try:
        # Find the first {...} block (basic JSON object)
        match = re.search(r"\{.*\}", text, re.DOTALL)
        if match:
            json_str = match.group(0)
            return json.loads(json_str)
    except json.JSONDecodeError as e:
        print("Invalid JSON found:", e)
    
    return None

sample = extract_json_str(results['documents'][0][0])
sample

{'id': '219ac52d-ab45-4f0c-aee5-eef9b3cac0e1',
 'body': '<p>The Jamaican set his mark at the 2009 world championships in Berlin, breaking his own 9.69 record from the previous year\'s Beijing Olympics, and it has now stood for longer than the 14 years of Jim Hines\'s 9.95 set at the 1968 Mexico City Olympics.</p><p>Research by Puma, the company that sponsored him through his glorious era of dominance, predicted that Bolt would run 9.42 in today\'s shoes and, speaking at an event ahead of the world championships in Tokyo, he said: "I fully agree.</p><ul><li><strong><a href="https://www.skysports.com/more-sports/athletics/news/29876/13428227/explained-what-are-the-enhanced-games-and-is-mainstream-sport-under-threat">Explained: What are the \'Enhanced Games\' - and is mainstream sport under threat?</a></strong></li><li><a href="https://www.skysports.com/more-sports/athletics/news/29175/13427830/world-athletics-championships-team-gb-target-top-eight-finish-in-tokyo-while-new-sex-test-is-in

In [104]:
def handle_json_obj(data_obj):
    try:
        return data_obj.get("body", "")
    except Exception:
        return ""

sample_body = handle_json_obj(sample)
sample_body

'<p>The Jamaican set his mark at the 2009 world championships in Berlin, breaking his own 9.69 record from the previous year\'s Beijing Olympics, and it has now stood for longer than the 14 years of Jim Hines\'s 9.95 set at the 1968 Mexico City Olympics.</p><p>Research by Puma, the company that sponsored him through his glorious era of dominance, predicted that Bolt would run 9.42 in today\'s shoes and, speaking at an event ahead of the world championships in Tokyo, he said: "I fully agree.</p><ul><li><strong><a href="https://www.skysports.com/more-sports/athletics/news/29876/13428227/explained-what-are-the-enhanced-games-and-is-mainstream-sport-under-threat">Explained: What are the \'Enhanced Games\' - and is mainstream sport under threat?</a></strong></li><li><a href="https://www.skysports.com/more-sports/athletics/news/29175/13427830/world-athletics-championships-team-gb-target-top-eight-finish-in-tokyo-while-new-sex-test-is-introduced-in-world-first#:~:text=All%20athletes%20in%20fe

In [105]:
json_array = [extract_json_str(doc) for doc in results['documents'][0]]
context_array = [handle_json_obj(json_str) for json_str in json_array]

context_data = "\n".join(context_array)
context_data

'<p>The Jamaican set his mark at the 2009 world championships in Berlin, breaking his own 9.69 record from the previous year\'s Beijing Olympics, and it has now stood for longer than the 14 years of Jim Hines\'s 9.95 set at the 1968 Mexico City Olympics.</p><p>Research by Puma, the company that sponsored him through his glorious era of dominance, predicted that Bolt would run 9.42 in today\'s shoes and, speaking at an event ahead of the world championships in Tokyo, he said: "I fully agree.</p><ul><li><strong><a href="https://www.skysports.com/more-sports/athletics/news/29876/13428227/explained-what-are-the-enhanced-games-and-is-mainstream-sport-under-threat">Explained: What are the \'Enhanced Games\' - and is mainstream sport under threat?</a></strong></li><li><a href="https://www.skysports.com/more-sports/athletics/news/29175/13427830/world-athletics-championships-team-gb-target-top-eight-finish-in-tokyo-while-new-sex-test-is-introduced-in-world-first#:~:text=All%20athletes%20in%20fe

In [106]:

question = "I want to know somethings about olympic sprinting."
final_prompt = prompt.format(context=context_data, question=question)
print(final_prompt)


      You are a helpful news assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.
      If some context is not related to the question, ignore it.

      <p>The Jamaican set his mark at the 2009 world championships in Berlin, breaking his own 9.69 record from the previous year's Beijing Olympics, and it has now stood for longer than the 14 years of Jim Hines's 9.95 set at the 1968 Mexico City Olympics.</p><p>Research by Puma, the company that sponsored him through his glorious era of dominance, predicted that Bolt would run 9.42 in today's shoes and, speaking at an event ahead of the world championships in Tokyo, he said: "I fully agree.</p><ul><li><strong><a href="https://www.skysports.com/more-sports/athletics/news/29876/13428227/explained-what-are-the-enhanced-games-and-is-mainstream-sport-under-threat">Explained: What are the 'Enhanced Games' - and is mainstream sport under threat?</a></strong></li><li><a

In [107]:
summary = model.invoke(final_prompt)
print(summary.content)

From the provided context, here are some key points about Olympic sprinting:

1. **Usain Bolt's Legacy**: Usain Bolt, a Jamaican sprinter, is a dominant figure in Olympic sprinting history. He set the 100m world record of 9.58 seconds at the 2009 World Championships in Berlin, breaking his own previous record of 9.69 seconds from the 2008 Beijing Olympics. This world record has remained unbeaten for over 14 years.

2. **Technological Advances**: Bolt commented on the advancements in track spikes (shoes) and believed that, with today's technology, he could have run even faster. Puma, his sponsor, predicted he might have run 9.42 seconds in today's shoes.

3. **Jamaican Sprinting**: Despite Bolt's retirement in 2017, Jamaican men haven't won a global sprint title since his victory at the 2016 Rio Olympics. However, rising talents like Kishane Thompson and Oblique Seville have shown promise. Thompson ran 9.75 seconds at the Jamaican championships, making him the sixth-fastest of all time.

## Trying ChromaDB features

In [108]:
sample_store = client.get_or_create_collection(name="sample_collection")

sample_store.add(
    documents=["An apple a day keeps the doctor away.",
               "Apple launched its latest product",
               "Apple is looking at buying a UK startup for $1 billion"
               ,"Apple is a fruit that is red or green in color."],
    ids=["sample1", "sample2", "sample3", "sample4"]
)


/Users/pmunab753@apac.comcast.com/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:53<00:00, 1.56MiB/s]


In [111]:
results = sample_store.query(query_texts=["how many calories are there in an apple?"], n_results=2)
print(results["documents"])


[['Apple is a fruit that is red or green in color.', 'An apple a day keeps the doctor away.']]


In [113]:
all_connections = client.list_collections()
print(all_connections)

[Collection(name=sample_collection), Collection(name=sky_news_sample_collection)]


In [114]:
sample_store.count()

4

In [120]:
result = model.invoke("Who is trump?")
print(result.content)

Donald Trump is an American businessman, media personality, and politician who served as the 45th President of the United States from January 20, 2017, to January 20, 2021. He was born on June 14, 1946, in Queens, New York City.

Before entering politics, Trump was primarily known for his career as a real estate developer, where he gained fame for building skyscrapers, hotels, casinos, and golf courses under the Trump Organization. He also became a prominent television personality as the host of the reality TV show *The Apprentice*. 

Trump entered the political arena as a Republican candidate, winning the presidency in the 2016 election against Hillary Clinton. His presidency was marked by controversial policies, such as tax reform, immigration policy changes, and trade disputes, as well as significant political polarization. Key moments of his presidency included appointing three Supreme Court Justices, withdrawing the U.S. from international agreements like the Paris Climate Accord,

In [121]:
# Fastapi -> api
# wiki integration
# search engine
# AWS

In [125]:
response = model.invoke("Who is the current priminister of Nepal?")
print(response.content)

As of my knowledge cutoff in October 2023, the current Prime Minister of Nepal is **Pushpa Kamal Dahal**, also known as **Prachanda**. He assumed office on **December 26, 2022**, after forming an alliance with several political parties. This is his third term as Prime Minister. For the most up-to-date information, please verify with current news sources.


In [131]:
from ddgs import DDGS

def duckduckgo_search_tool(query: str, max_results: int = 5):
    """A search tool that uses DuckDuckGo to search the web."""
    with DDGS() as ddgs:
        results = [r for r in ddgs.text(query, max_results=max_results)]
    return results

In [132]:
import wikipedia

def wikipedia_search_tool(query: str, sentences: int = 3):
    """A search tool that uses Wikipedia to search for relevant information."""
    try:
        summary = wikipedia.summary(query, sentences=sentences)
        return summary
    except Exception as e:
        return f"Error: {str(e)}"

In [135]:
from langchain.tools import Tool
from langchain.agents import create_react_agent, AgentExecutor
from langchain import hub


duckduckgo_search_tool = Tool(
    name="DuckDuckGo Search",
    func=duckduckgo_search_tool,
    description="Useful for answering questions about current events or the web"
)

wiki_tool = Tool(
    name="Wikipedia",
    func=wikipedia_search_tool,
    description="Useful for answering factual questions from Wikipedia"
)

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(
    llm=model,
    tools=[duckduckgo_search_tool, wiki_tool],
    prompt=prompt
)

agent_executor = AgentExecutor(
    agent=agent,
    tools=[duckduckgo_search_tool, wiki_tool],
    verbose=True
)

response = agent_executor.invoke({"input": "who is the current prime minister of Nepal in 2025 and how did the people choose that?"})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is about the prime minister of Nepal in 2025, which refers to specific future information. I need to first use DuckDuckGo to find out who the current prime minister of Nepal is in 2025, and then determine how the people chose them.

Action: DuckDuckGo Search
Action Input: "current prime minister of Nepal 2025 and how they were chosen"[0m[36;1m[1;3m[{'title': 'Nepal General Elections 2017: Oli Likely To be the Next PM', 'href': 'https://www.nepalisansar.com/news/nepal-general-elections-2017-oli-likely-next-pm/', 'body': '... Nepali Parliament, the Communist Alliance is projecting the erstwhile Prime Minister and the communist party leader Khadga Prasad Oli as its Prime ...'}, {'title': 'Nepal Politics 2018 | Cabinet Expansion | Presidential Election', 'href': 'https://www.nepalisansar.com/news/nepal-politics-2018-presidential-election-cabinet-expansion-constitutional-amendment-make-headlines/', 'body': 'The new

In [1]:
response = model.invoke("trump")
print(response.content)

NameError: name 'model' is not defined