# News Recommenation System using KNN

In [1]:
import warnings 
warnings.filterwarnings("ignore")

In [2]:
import os
import pandas as pd
from sqlalchemy import create_engine

In [3]:
DATABASE_URI = os.getenv("DATABASE_URI")
engine = create_engine(DATABASE_URI)

NoSuchModuleError: Can't load plugin: sqlalchemy.dialects:prisma.postgres

In [4]:
import sys
sys.path.append('..')
from recommender import NewsRecommendationSystem

In [5]:
QUERY = """
SELECT 
    posts.id,
    posts.title AS heading,
    posts.createdAt AS date,
    posts.visitCount,
    categories.name AS category,
    sentiments.label AS sentiment
FROM 
    posts
LEFT JOIN categories ON posts.categoryId = categories.id
LEFT JOIN sentiments ON posts.sentimentId = sentiments.id
WHERE 
    posts.status = 'published'; -- Fetch only published posts

"""

In [6]:
df = pd.read_sql_query(QUERY, engine)
df.head()

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "posts" does not exist
LINE 10:     posts
             ^

[SQL: 
SELECT 
    posts.id,
    posts.title AS heading,
    posts.createdAt AS date,
    posts.visitCount,
    categories.name AS category,
    sentiments.label AS sentiment
FROM 
    posts
LEFT JOIN categories ON posts.categoryId = categories.id
LEFT JOIN sentiments ON posts.sentimentId = sentiments.id
WHERE 
    posts.status = 'published'; -- Fetch only published posts

]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [3]:
articles = df.to_dict(orient="records")
articles[:5]

[{'category': 'Market',
  'heading': 'नेपाल फ्रेड फर्वार्ड्स एसोसिएसनमा अधिकारी',
  'id': 1,
  'date': '2024-10-10'},
 {'category': 'Business',
  'heading': 'एप्पल इभेन्टको घोषणा, जुन ६ बाट शुरु हुने, आइफोन प्रयोगकर्तालाई ठूलो उपहार हुन सक्ने',
  'id': 2,
  'date': '2024-11-06'},
 {'category': 'Health',
  'heading': 'अत्यावश्यक परेकालाई मात्र गाउँ फर्काइँदै',
  'id': 3,
  'date': '2024-11-12'},
 {'category': 'Technology',
  'heading': 'रिचार्जर एपको योजना',
  'id': 4,
  'date': '2024-09-14'},
 {'category': 'Literature',
  'heading': 'सारंगीको धुन पछ्याउँदै साकिराको देशदेखि बाटुलेचौरसम्म',
  'id': 5,
  'date': '2024-10-10'}]

In [4]:
print(f"Total articles: {len(articles)}")
print(df.shape[0])

Total articles: 58928
58928


In [5]:
# Initialize and train model
recommender = NewsRecommendationSystem(
        k=5, 
        metric="euclidean", 
        time_decay_factor=0.1
        )
recommender.fit(articles)

In [10]:
def get_recommendation_data(recommended_id):
    recommended_id = recommended_id - 1
    print(f"Id: {articles[recommended_id]['id']}")
    print(f"Category: {articles[recommended_id]['category']}")
    print(f"Heading: {articles[recommended_id]['heading']}")
    print(f"Date: {articles[recommended_id]['date']}")

In [7]:
# Test recommendation
test_article_id = articles[8522]["id"]

# print test article details from articles
print(f"Article details:\n{articles[8522]}")

Article details:
{'category': 'Economy', 'heading': 'उत्पादनशील क्षेत्र र व्यावसायिक क्षेत्रको ब्याजदर फरक पारिने', 'id': 8523, 'date': '2024-09-25'}


In [8]:
recommendations = recommender.recommend(test_article_id, limit=5)
print(recommendations)

[3015, 27259, 52480, 44483, 19524]


In [12]:
print(f"Recommendations for article {test_article_id}:\n")
for recommendation in recommendations:
    get_recommendation_data(recommended_id=recommendation)
    print("------------------------")

Recommendations for article 8523:

Id: 3015
Category: Economy
Heading: चुलिँदै चुनौती
Date: 2024-09-06
------------------------
Id: 27259
Category: Economy
Heading: सुस्तायो विद्युतीय कारोबार
Date: 2024-10-29
------------------------
Id: 52480
Category: Economy
Heading: पुँजी बजारको विकास लागि उत्पादनशील क्षेत्रमा लगानीकर्ताको आकर्षण बढाइने
Date: 2024-10-09
------------------------
Id: 44483
Category: Economy
Heading: अब मुद्दती र अन्य निक्षेपको ब्याजदर पाँच प्रतिशतभन्दा बढी फरक पार्न नपाइने
Date: 2024-09-13
------------------------
Id: 19524
Category: Economy
Heading: वाणिज्य बैंकले घटाए कर्जाको ब्याजदर
Date: 2024-09-21
------------------------
