In [1]:
import numpy as np
import random

In [2]:
REWARD_MAP = {0: "Didn’t view", 1: "Opened", 2: "Read most", 3: "Commented", 4: "Shared"}


In [9]:
class NewsRecommendationSystem:
    def __init__(self, num_articles, epsilon=0.1, initial_value=0.5):
        
        self.num_articles = num_articles
        self.epsilon = epsilon
        self.q_values = np.full(num_articles, initial_value)  
        self.article_counts = np.zeros(num_articles)  

    def recommend_article(self):
       
        if random.random() < self.epsilon:
            # Explore: Recommend a random article
            return random.randint(0, self.num_articles - 1)
        else:
            # Exploit: Recommend the article with the highest estimated reward
            return np.argmax(self.q_values)

    def update_estimates(self, article_index, reward):
        
        self.article_counts[article_index] += 1
        self.q_values[article_index] += (reward - self.q_values[article_index]) / self.article_counts[article_index]

    def simulate_user_interaction(self, article_index):
        
        return random.choice([0, 1, 2, 3, 4])  # Random reward
    def run_simulation(self, num_recommendations):
        
        for i in range(num_recommendations):
            article_index = self.recommend_article()
            reward = self.simulate_user_interaction(article_index)
            print(f"Recommendation {i + 1}: Article {article_index + 1}, Reward: {REWARD_MAP[reward]}")
            self.update_estimates(article_index, reward)
            if (i + 1) % 100 == 0:
                print(f"After {i + 1} recommendations, Q-values: {self.q_values}")
                print(f"Article Counts: {self.article_counts}")

In [10]:
#(10% exploration)
num_articles = 10
epsilon = 0.15
news_recommendation_system = NewsRecommendationSystem(num_articles, epsilon)


In [11]:

news_recommendation_system.run_simulation(1000)


Recommendation 1: Article 1, Reward: Shared
Recommendation 2: Article 1, Reward: Shared
Recommendation 3: Article 10, Reward: Shared
Recommendation 4: Article 6, Reward: Commented
Recommendation 5: Article 1, Reward: Commented
Recommendation 6: Article 10, Reward: Shared
Recommendation 7: Article 10, Reward: Opened
Recommendation 8: Article 4, Reward: Opened
Recommendation 9: Article 1, Reward: Opened
Recommendation 10: Article 1, Reward: Didn’t view
Recommendation 11: Article 6, Reward: Opened
Recommendation 12: Article 10, Reward: Didn’t view
Recommendation 13: Article 1, Reward: Read most
Recommendation 14: Article 9, Reward: Shared
Recommendation 15: Article 9, Reward: Didn’t view
Recommendation 16: Article 1, Reward: Shared
Recommendation 17: Article 1, Reward: Opened
Recommendation 18: Article 1, Reward: Commented
Recommendation 19: Article 7, Reward: Commented
Recommendation 20: Article 1, Reward: Shared
Recommendation 21: Article 7, Reward: Read most
Recommendation 22: Article 