**CIA1**

DATASET AND REWARD MECHANISM

In [1]:
import numpy as np
import pandas as pd
import random

# Generate a simulated dataset of articles
np.random.seed(42)

def create_dataset(num_articles=100):
    articles = []
    for i in range(num_articles):
        article = {
            'id': i,
            'title': f"Article {i}",
            'views': np.random.randint(50, 1000),
            'alignment': random.choice(['aligned', 'neutral', 'unaligned']),  # Representing political or commercial alignment
        }
        articles.append(article)
    return pd.DataFrame(articles)

articles_df = create_dataset()

# Define the reward mechanism based on alignment and boosted views
def calculate_reward(article, views_boosted):
    if article['alignment'] == 'aligned':
        reward = views_boosted * 1.5  # Higher reward for aligned articles
    elif article['alignment'] == 'neutral':
        reward = views_boosted * 1.0
    else:
        reward = views_boosted * 0.5  # Lower reward for unaligned articles
    return reward


K ARM BANDIT APPROACH

In [2]:
class NewsValueMaximizerBandit:
    def __init__(self, articles_df, epsilon=0.1):
        self.articles_df = articles_df
        self.epsilon = epsilon  # Exploration rate
        self.article_rewards = {article_id: 0 for article_id in articles_df['id']}  # Store total reward per article
        self.article_counts = {article_id: 0 for article_id in articles_df['id']}  # Track how often each article is selected

    def select_article(self):
        # Epsilon-greedy strategy: explore or exploit
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.articles_df['id'].tolist())
        else:
            # Exploit: select the article with the highest average reward
            avg_rewards = {article_id: (self.article_rewards[article_id] / self.article_counts[article_id]
                                        if self.article_counts[article_id] > 0 else 0)
                           for article_id in self.article_rewards}
            return max(avg_rewards, key=avg_rewards.get)

    def update_rewards(self, article_id, reward):
        # Update reward and count for the selected article
        self.article_rewards[article_id] += reward
        self.article_counts[article_id] += 1


In [3]:
num_rounds = 1000  # Number of rounds to simulate

agent = NewsValueMaximizerBandit(articles_df, epsilon=0.1)

for round in range(num_rounds):
    # Select an article based on epsilon-greedy policy
    article_id = agent.select_article()
    article = articles_df[articles_df['id'] == article_id].iloc[0]

    # Simulate boosting views for the article
    views_boosted = np.random.randint(50, 200)  # Example boosted views
    reward = calculate_reward(article, views_boosted)  # Calculate reward

    # Update the agent with the received reward
    agent.update_rewards(article_id, reward)

    if round % 100 == 0:
        print(f"Round {round}: Promoted article {article_id} ({article['alignment']}) - Reward: {reward:.2f}")

# Final results
print("\nFinal Average Rewards for Top Articles:")
top_articles = sorted(agent.article_rewards.keys(), key=lambda x: agent.article_rewards[x] / max(agent.article_counts[x], 1), reverse=True)
for article_id in top_articles[:10]:  # Display top 10 articles by average reward
    avg_reward = agent.article_rewards[article_id] / max(agent.article_counts[article_id], 1)
    alignment = articles_df[articles_df['id'] == article_id].iloc[0]['alignment']
    print(f"Article {article_id} ({alignment}) - Average Reward: {avg_reward:.2f}")


Round 0: Promoted article 1 (unaligned) - Reward: 48.50
Round 100: Promoted article 9 (aligned) - Reward: 268.50
Round 200: Promoted article 9 (aligned) - Reward: 126.00
Round 300: Promoted article 9 (aligned) - Reward: 160.50
Round 400: Promoted article 9 (aligned) - Reward: 87.00
Round 500: Promoted article 9 (aligned) - Reward: 135.00
Round 600: Promoted article 9 (aligned) - Reward: 216.00
Round 700: Promoted article 9 (aligned) - Reward: 241.50
Round 800: Promoted article 9 (aligned) - Reward: 132.00
Round 900: Promoted article 71 (neutral) - Reward: 166.00

Final Average Rewards for Top Articles:
Article 9 (aligned) - Average Reward: 189.98
Article 39 (aligned) - Average Reward: 189.58
Article 92 (aligned) - Average Reward: 189.46
Article 26 (aligned) - Average Reward: 189.06
Article 3 (aligned) - Average Reward: 187.08
Article 47 (aligned) - Average Reward: 185.50
Article 25 (aligned) - Average Reward: 185.25
Article 21 (aligned) - Average Reward: 183.00
Article 55 (aligned) - A