In [1]:
import pandas as pd
import pickle

In [2]:
class ArticleRecommendationFacade:
    def __init__(self, articles_path: str, recommendations_path: str):
        # Load articles data
        self.articles_df = pd.read_csv(articles_path)
        # Load recommendations data
        with open(recommendations_path, 'rb') as f:
            self.recommendations = pickle.load(f)
        
        # Optional caches for efficiency
        self.article_cache = {}
        self.recommendation_cache = {}
    
    def get_article(self, article_id: str) -> dict:
        """Retrieve article details by ID."""
        # Check cache first
        if article_id in self.article_cache:
            return self.article_cache[article_id]
        
        # Search for article in the DataFrame
        article_row = self.articles_df[self.articles_df['uuid'] == article_id]
        if not article_row.empty:
            article_data = article_row.iloc[0].to_dict()
            self.article_cache[article_id] = article_data  # Cache for future access
            return article_data
        else:
            return {}  # Return empty if not found
    
    def get_recommendations(self, article_id: str) -> list:
        """Retrieve recommendations for a given article ID."""
        # Check cache first
        if article_id in self.recommendation_cache:
            return self.recommendation_cache[article_id]
        
        # Find the recommendation dictionary for the article ID
        recommendation_dict = self._find_recommendation_dict(article_id)
        if recommendation_dict:
            recommended_articles = recommendation_dict.get('recommendations', [])
            self.recommendation_cache[article_id] = recommended_articles  # Cache
            return recommended_articles
        else:
            return []  # Return empty if no recommendations found

    def get_article_with_recommendations(self, article_id: str) -> dict:
        """Retrieve both article details and recommendations for the article ID."""
        article_data = self.get_article(article_id)
        recommendations = self.get_recommendations(article_id)
        
        # Combine data into a single dictionary
        return {
            "article": article_data,
            "recommendations": recommendations
        }
    
    def _find_recommendation_dict(self, article_id: str) -> dict:
        """Find the recommendation dictionary for a specific article ID."""
        for recommendation in self.recommendations:
            if recommendation.get('article id') == article_id:
                return recommendation
        return {}

In [3]:

# Initialize the facade with the provided paths
facade = ArticleRecommendationFacade('data/test_set_articles.csv', 'data/recommendations.pkl')

# Test example retrieval
sample_article_id = facade.articles_df['uuid'].iloc[0]  # Take the first article's ID
sample_result = facade.get_article_with_recommendations(sample_article_id)
sample_result


{'article': {'uuid': 'TV2-15394074',
  'byline': "['Jack Theetha', 'Hege Skorpen Olsen', 'Sophie Amundsen Dregelid', 'Agnes Alstad Mogstad']",
  'title': '– Hun er vår tids største kvinnesakskvinne',
  'lead_text': 'GOD MORGEN NORGE (TV 2): Nå blir avdøde Shabana Rehman hyllet i minoritetsmiljøer.',
  'creation_date': '2023-01-02 10:35:26+00:00',
  'last_modified': '2023-01-02 23:26:33+00:00',
  'tags': "['shabana rehman', 'mullaløft', 'bjørn egil halvorsen', 'shazia majid', 'mulla krekar', 'minoritetsjenter', 'kvinnekamp', 'innenriks', 'nyheter', 'god morgen norge']",
  'url': 'https://www.tv2.no/nyheter/innenriks/hun-er-var-tids-storste-kvinnesakskvinne/15394074/',
  'body_text': 'Torsdag ettermiddag kom den rystende beskjeden. Komiker, samfunnsdebattant og skribent Shabana Rehman (46) hadde dødd av bukspyttkjertelkreften – som hun hadde fått påvist i januar.Det er ingen hemmelighet at hennes ettermæle fortsatt lever, og hennes langvarige kamp har satt store fotavtrykk.Nå kommer det 

In [6]:
facade.recommendations[0]

{'recommendations results': [(10,
   'TV2-15394074',
   'TV2-15237542',
   0.70869493,
   '2022-11-02 19:58:19+00:00',
   "Main article discusses Shabana Rehman's life, her activism, and her battle with cancer. Potential article focuses on her death and legacy, providing essential context about her contributions and struggles. Both articles are deeply interconnected, highlighting her impact and the public's response."),
  (8,
   'TV2-15394074',
   'TV2-15186618',
   0.6936177,
   '2022-10-14 05:57:53+00:00',
   "Main article discusses Shabana Rehman's impact on women's rights and societal norms. Potential article highlights a politician's protest against hijab oppression, emphasizing women's choice. Both articles focus on women's empowerment and the struggle against patriarchal structures, making them closely related."),
  (8,
   'TV2-15394074',
   'TV2-14485687',
   0.66861564,
   '2022-01-11 18:19:52+00:00',
   "Main article discusses Shabana Rehman's impact on societal norms and wom

In [8]:
facade.articles_df[facade.articles_df['uuid'] == 'TV2-15237542']



Unnamed: 0,uuid,byline,title,lead_text,creation_date,last_modified,tags,url,body_text,related_articles,section,related_media_links,related_articles_counts,cleaned_related_articles,creation_time,number_cleaned_related_articles,all_text,full_text_embeddings


In [5]:
import pandas as pd
import pickle

# Load articles data
articles_df = pd.read_csv('data/test_set_articles.csv')

# Load recommendations data
with open('data/recommendations.pkl', 'rb') as f:
    recommendations = pickle.load(f)

# Convert the list of recommendation dictionaries into a DataFrame
recommendations_df = pd.DataFrame(recommendations)

# Rename 'article id' column to 'uuid' to match the articles DataFrame
recommendations_df.rename(columns={'article id': 'uuid'}, inplace=True)

# Merge the two DataFrames on 'uuid'
combined_df = pd.merge(articles_df, recommendations_df, on='uuid', how='left')

# Save the combined DataFrame to a new CSV file
combined_df.to_csv('data/combined_articles_recommendations.csv', index=False)