In [20]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [21]:
class MovieRecommender:
    def __init__(self, csv_path):
        self.df = pd.read_csv(csv_path)
        self.df['features'] = self.df['Genre'] + ' ' + self.df['Director'] + ' ' + self.df['Star1']
        
        self.vectorizer = TfidfVectorizer(stop_words='english')
        self.feature_matrix = self.vectorizer.fit_transform(self.df['features'])
        
        self.similarity_matrix = cosine_similarity(self.feature_matrix)
        
        self.movie_indices = {title: idx for idx, title in enumerate(self.df['Series_Title'])}
        
        self.arms = {}
        self.total_pulls = 0
        self.arm_pulls = {}
        self.arm_rewards = {}

    def _ucb_score(self, arm):
        if arm not in self.arm_pulls or self.arm_pulls[arm] == 0:
            return float('inf')
        
        average_reward = self.arm_rewards[arm] / self.arm_pulls[arm]
        exploration_bonus = np.sqrt(2 * np.log(self.total_pulls) / self.arm_pulls[arm])
        
        return average_reward + exploration_bonus

    def recommend(self, movie_title, n_recommendations=5):
        if movie_title not in self.movie_indices:
            raise ValueError(f"Movie '{movie_title}' not found in database")
        
        movie_index = self.movie_indices[movie_title]
        similar_indices = self.similarity_matrix[movie_index].argsort()[::-1][1:101]
        
        potential_recommendations = self.df.iloc[similar_indices]
        
        recommendations = []
        for _, movie in potential_recommendations.iterrows():
            if movie['Series_Title'] != movie_title and movie['Series_Title'] not in recommendations:
                recommendations.append(movie['Series_Title'])
                if len(recommendations) == n_recommendations:
                    break
        
        self._update_bandit_stats(recommendations)
        return recommendations

    def _update_bandit_stats(self, recommended_movies):
        self.total_pulls += 1
        
        for movie in recommended_movies:
            if movie not in self.arm_pulls:
                self.arm_pulls[movie] = 0
                self.arm_rewards[movie] = 0
            
            self.arm_pulls[movie] += 1
            
            ucb_scores = {arm: self._ucb_score(arm) for arm in self.arm_pulls}
            top_arm = max(ucb_scores, key=ucb_scores.get)
            
            self.arm_rewards[top_arm] += 1

In [23]:
recommender = MovieRecommender(csv_path='./imdb_top_1000.csv')
recommendations = recommender.recommend('Shrek')
print("Recommendations:", recommendations)

Recommendations: ['Finding Nemo', 'WALL·E', 'Uri: The Surgical Strike', 'Vampire Hunter D: Bloodlust', 'The Fugitive']
