In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
import os
import pandas as pd

from sqlalchemy import create_engine

In [3]:
import sys

sys.path.append("..")
from recommender import NewsRecommendationSystem

In [4]:
# Load database URL from environment variables
DATABASE_URL = os.getenv("DATABASE_URL")

# Establish a connection to the database
engine = create_engine(DATABASE_URL)

In [5]:
QUERY = """
SELECT 
    "Post".id,
    "Post".title AS heading,
    "Post"."createdAt" AS date,
    "Post"."visitCount",
    "Category".name AS category_name,
    "Sentiment".name AS sentiment_name
FROM 
    "Post"
LEFT JOIN "Category" ON "Post"."categoryId" = "Category".id
LEFT JOIN "Sentiment" ON "Post"."sentimentId" = "Sentiment".id
WHERE
    "Sentiment".name != 'NEGATIVE'
"""

In [6]:
df = pd.read_sql_query(QUERY, engine)

In [7]:
# Convert DataFrame to a list of dictionaries for easier processing
articles = df.to_dict(orient="records")

In [8]:
# Display the first few records
print("Sample article records:")
print(articles[:5])

Sample article records:
[{'id': 'cm6cglxr40001gjp8jmi8adbz', 'heading': 'नेपालमा शिक्षा प्रणाली सुधार', 'date': Timestamp('2025-01-25 17:23:15.616000'), 'visitCount': 0, 'category_name': 'economy', 'sentiment_name': 'POSITIVE'}, {'id': 'cm6cglyjo0002gjp8n9x8e2om', 'heading': 'नेपालमा महिलाको सशक्तिकरण', 'date': Timestamp('2025-01-25 17:23:16.645000'), 'visitCount': 0, 'category_name': 'economy', 'sentiment_name': 'NEUTRAL'}, {'id': 'cm6cglzcu0003gjp8cl64f1im', 'heading': 'नेपालमा आगामी चुनावको तयारी', 'date': Timestamp('2025-01-25 17:23:17.695000'), 'visitCount': 0, 'category_name': 'economy', 'sentiment_name': 'POSITIVE'}, {'id': 'cm6cgm06d0004gjp8rdu7icct', 'heading': 'नेपालमा महिलाको सशक्तिकरण', 'date': Timestamp('2025-01-25 17:23:18.758000'), 'visitCount': 0, 'category_name': 'economy', 'sentiment_name': 'POSITIVE'}, {'id': 'cm6cgm10u0005gjp8t3x1g2a0', 'heading': 'नेपालका प्रमुख पर्यटकीय गन्तव्यहरू', 'date': Timestamp('2025-01-25 17:23:19.855000'), 'visitCount': 0, 'category_name':

In [9]:
MODEL_DIR = os.path.join(os.getcwd(), "..", "models")
if not os.path.exists(MODEL_DIR):
    print(f"Creating directory: {MODEL_DIR}")
    os.makedirs(MODEL_DIR)

MODEL_PATH = os.path.join(MODEL_DIR, "news_recommendation_model.pkl")

In [10]:
sample_article_id = articles[0]["id"]

In [11]:
recommender_saved = NewsRecommendationSystem.load_model(MODEL_PATH)

In [12]:
op = recommender_saved.recommend(sample_article_id)

In [13]:
op

[('cm6cglzcu0003gjp8cl64f1im', 1.261624932178372),
 ('cm6dw2oy20004gjx8aqtorl2q', 1.2652088311816925),
 ('cm6cgm06d0004gjp8rdu7icct', 1.2883576606946117),
 ('cm6cgmwvj0016gjp8skudadsf', 1.4142135623730951),
 ('cm6dw2mk70001gjx8a1tkndwj', 1.4174116963698524)]

In [14]:
def get_recommendation_data(recommended_id: str):
    """
    Fetch and display details of a recommended article.

    Args:
        recommended_id: The ID of the article to fetch.
    """
    article = next(a for a in articles if a["id"] == recommended_id)
    print(f"ID: {article['id']}")
    print(f"Category: {article['category_name']}")
    print(f"Heading: {article['heading']}")
    print(f"Sentiment: {article['sentiment_name']}")
    print(f"Date: {article['date']}")

In [15]:
new_test_article_id = articles[34]["id"]
print("Testing with article:", articles[3])

recommendations = recommender_saved.recommend(new_test_article_id, limit=5)
print("Recommended articles:")
for rec_id, distances in recommendations:
    get_recommendation_data(rec_id)
    print("------------------------")

Testing with article: {'id': 'cm6cgm06d0004gjp8rdu7icct', 'heading': 'नेपालमा महिलाको सशक्तिकरण', 'date': Timestamp('2025-01-25 17:23:18.758000'), 'visitCount': 0, 'category_name': 'economy', 'sentiment_name': 'POSITIVE'}
Recommended articles:
ID: cm6cgm3km0008gjp8wg6dsn5x
Category: opinion
Heading: नेपालको नयाँ विकास योजनाहरू
Sentiment: POSITIVE
Date: 2025-01-25 17:23:23.159000
------------------------
ID: cm6cgm10u0005gjp8t3x1g2a0
Category: opinion
Heading: नेपालका प्रमुख पर्यटकीय गन्तव्यहरू
Sentiment: POSITIVE
Date: 2025-01-25 17:23:19.855000
------------------------
ID: cm6dw3lj00014gjx81p020bju
Category: health
Heading: नेपालको अर्थव्यवस्था र भविष्य
Sentiment: POSITIVE
Date: 2025-01-26 17:24:39.997000
------------------------
ID: cm6dw3a5s000rgjx8j1xbds3d
Category: diaspora
Heading: नेपालको अर्थव्यवस्था र भविष्य
Sentiment: POSITIVE
Date: 2025-01-26 17:24:25.265000
------------------------
ID: cm6cgmbdv000hgjp8hzsg3cgr
Category: sports
Heading: नेपालको अर्थव्यवस्था र भविष्य
Sentime