In [2]:
import numpy as np
import random
import math

# Sample movies for recommendation categories and Q-table initialization
movies = ["Action", "Comedy", "Drama", "Horror", "Romance"]
q_table = {movie: 0 for movie in movies}
preferred_movies = ["Action", "Comedy"]  # Movies the user prefers

print(q_table)

# Learning Parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.3  # Exploration rate (to prevent convergence)

# Helper function to calculate recommendation metrics
def calculate_metrics(preferred_movies):
    # Recommendation Accuracy (aligned recommendations)
    preferred_recommendations = sum([1 for movie, q_value in q_table.items() if movie in preferred_movies and q_value > 0])
    accuracy = preferred_recommendations / len(preferred_movies)
    print(f"\nRecommendation Accuracy: {accuracy:.2f}")

    # Entropy of Q-Values (consistency of preferences)
    q_values = list(q_table.values())
    total_q = sum(q_values)
    entropy = -sum((q / total_q) * math.log(q / total_q) if q > 0 else 0 for q in q_values)
    print(f"Entropy of Q-Values: {entropy:.2f}")

    # Average Q-Value for Preferred Movies
    avg_preferred_q = np.mean([q_table[movie] for movie in preferred_movies])
    print(f"Average Q-Value for Preferred Movies: {avg_preferred_q:.2f}")

    # Average Q-Value Overall
    avg_q_value = np.mean(list(q_table.values()))
    print(f"Average Q-Value (Overall): {avg_q_value:.2f}")

# Simulate user interaction with movie recommendations
def user_interaction(movie):
    # Higher chance of reward for preferred movies, else neutral reward
    preferences = {"Action": 1, "Comedy": 1, "Drama": 0, "Horror": 0, "Romance": 0}

    # Reward for agent based on user preference
    return preferences.get(movie, 0)

# Agent learns User Preferences
def train_agent(preferred_movies, iterations=100):
    print("\n=== Training Agent with User Preferences ===")
    global q_table
    for i in range(iterations):
        movie = random.choice(movies) if random.uniform(0, 1) < epsilon else max(q_table, key=q_table.get)
        reward = user_interaction(movie)

        # Update Q-table every 10 iterations
        q_table[movie] += alpha * (reward + gamma * max(q_table.values()) - q_table[movie])
        if i % 10 == 0:
            print(f"Iteration {i} - Q-Table: {q_table}")

    calculate_metrics(preferred_movies)
    print("\nQ-Table after initial training:", q_table)

train_agent(preferred_movies, iterations=500)


{'Action': 0, 'Comedy': 0, 'Drama': 0, 'Horror': 0, 'Romance': 0}

=== Training Agent with User Preferences ===
Iteration 0 - Q-Table: {'Action': 0.1, 'Comedy': 0, 'Drama': 0, 'Horror': 0, 'Romance': 0}
Iteration 10 - Q-Table: {'Action': 0.8648275251635912, 'Comedy': 0.29083924646470904, 'Drama': 0, 'Horror': 0, 'Romance': 0}
Iteration 20 - Q-Table: {'Action': 1.3994164535871152, 'Comedy': 0.29083924646470904, 'Drama': 0.1716460904100236, 'Horror': 0.11023107930092886, 'Romance': 0.10225361545548368}
Iteration 30 - Q-Table: {'Action': 2.143218591927811, 'Comedy': 0.29083924646470904, 'Drama': 0.1716460904100236, 'Horror': 0.11023107930092886, 'Romance': 0.27056072332479403}
Iteration 40 - Q-Table: {'Action': 2.7501966404214637, 'Comedy': 0.29083924646470904, 'Drama': 0.38202659647058457, 'Horror': 0.11023107930092886, 'Romance': 0.47777431494286227}
Iteration 50 - Q-Table: {'Action': 3.2427095093971667, 'Comedy': 0.6535991776639831, 'Drama': 0.5913416344614578, 'Horror': 0.110231079300