In [18]:
import elasticsearch_dsl as dsl
import spacy
from elasticsearch_dsl import Document, Text, Keyword, Date, Boolean, Float, Nested, DenseVector
from typing import List, Dict, Any

class Article(Document):
    title: str = Text()
    link: str = Keyword()
    published: str = Date()
    summary: str = Text()
    source: str = Keyword()
    embedding: List[float] = DenseVector()
    nlp_processed: bool = Boolean()
    entities: List[Dict[str, Any]] = Nested()
    sentiment: float = Float()

    class Index:
        name = "rss_feeds"

    def clean(self):
        if not self.embedding:
            doc = nlp(self.summary)
            self.entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
            self.sentiment = (
                doc._.blob.sentiment.polarity
            )  # Using spacytextblob for sentiment analysis
            self.nlp_processed = True
            self.embedding = doc.vector.tolist()

dsl.connections.create_connection(hosts=['http://localhost:9200'])



<Elasticsearch(['http://localhost:9200'])>

In [39]:
# Load the NLP model
nlp = spacy.load("en_core_web_md")

query = "Yulo"
vector = nlp(query).vector
print(len(vector))

s = Article.search(index="rss_feeds")
s = s.knn(field="embedding", k=5, num_candidates=10, query_vector=vector.tolist())
response = s.execute()
for hit in response:
    print(hit.title)
    print(hit.summary)
    print("\n")

300
[[34m2024-08-31T09:28:56.164+0800[0m] {[34m_transport.py:[0m349} INFO[0m - POST http://localhost:9200/rss_feeds/_search [status:200 duration:0.005s][0m
Kadiri: uod sa utak atbp. mga parasito
NAHIHIBANG umano si Robert F. Kennedy Jr. sa pagkan­didatong presidente.


Jenna Ortega shuts down past Johnny Depp dating rumor
Actress Jenna Ortega laughed off the rumor that she dated Johnny Depp.


Aubrey, pitong buwan niligawan ni Troy
Ikinuwento nina Aubrey Miles at Troy Montero ang una nilang pagkikita. Ayon sa aktres, inakala niya noong una na walang gusto sa kaniya ang aktor.


Glaiza De Castro at David Rainey, plano na kayang magkaanak?
Matapos na ikasal, plano na kaya agad nina Glaiza De Castro at David Rainey na bumuo ng sarili nilang pamilya?


Partido ni Pangulong Marcos pinanumpa pambatong alkalde sa Pasig
Tiyak na mapapalaban umano si Mayor Vico Sotto sa darating na 2025 midterm election matapos manumpa na bilang miyembro ng Partido Federal ng Pilipinas ang tinawag nitong 

In [36]:
query = "Carlos Yulo"

s = Article.search(index="rss_feeds")
s = s.query(dsl.query.Match(summary=query))
response = s.execute()
for hit in response:
    print(hit.title)
    # print(hit.summary)
    print("\n")

[[34m2024-08-31T09:27:18.016+0800[0m] {[34m_transport.py:[0m349} INFO[0m - POST http://localhost:9200/rss_feeds/_search [status:200 duration:0.016s][0m
‘Laruang de baterya’: Carlos Yulo amuses with ‘Maybe This Time’ dance trend take


‘Fake’: Nueva Ecija governor disowns viral congratulatory post for Carlos Yulo


Yulo gets house and lot from Century Properties


Castañeda receives recognition in Cebu


Francis Libiran and Carlos Yulo 'collaborate' for fashion design inspiration


Carlos Yulo gets Land Cruiser Prado reward from Toyota


Chery Auto gifts Carlos Yulo a Tiggo 7, lifetime free oil change


Carlos Yulo also had to do mental gymnastics --psychologist Anna Tuazon


The shortness of Carlos Yulo


Young Pinay gymnast in UAE inspired by Carlos Yulo's double gold win


