In [None]:
import pandas as pd
import numpy as np
import re
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gc
from functools import lru_cache
import os

class MemoryOptimizedRecommender:
    def __init__(self):
        self.movies_data = None
        self.ratings_data = None
        self.combined_data = None
        self.vectorizer_title = None
        self.tfidf_title = None
        self.vectorizer_genres = None
        self.tfidf_genres = None

    def load_data_in_chunks(self, filepath, chunksize=100000):
        chunks = []
        for chunk in pd.read_csv(filepath, chunksize=chunksize):
            for col in chunk.columns:
                if chunk[col].dtype == 'object':
                    chunk[col] = chunk[col].astype('category')
                elif pd.api.types.is_float_dtype(chunk[col]):
                    chunk[col] = pd.to_numeric(chunk[col], downcast='float')
                elif pd.api.types.is_integer_dtype(chunk[col]):
                    chunk[col] = pd.to_numeric(chunk[col], downcast='integer').astype('int32')
            chunks.append(chunk)
        return pd.concat(chunks, ignore_index=True)

    def clean_title(self, title):
        if not isinstance(title, str):
            return ""
        return re.sub("[^a-zA-Z0-9 ]", "", title)

    def load_and_preprocess(self, movies_path, ratings_path):
        try:
            print("Loading movies data...")
            self.movies_data = self.load_data_in_chunks(movies_path)
            self.movies_data['genres'] = self.movies_data['genres'].str.split('|')
            self.movies_data['title'] = self.movies_data['title'].apply(self.clean_title)
            self.movies_data = self.movies_data[['movieId', 'title', 'genres']]
            self.movies_data = self.movies_data[
                ~self.movies_data['genres'].apply(
                    lambda x: '(no genres listed)' in x if isinstance(x, list) else False
                )
            ]

            print("Loading ratings data...")
            self.ratings_data = self.load_data_in_chunks(ratings_path)
            self.ratings_data = self.ratings_data.drop(['timestamp'], axis=1)
            self.movies_data['movieId'] = self.movies_data['movieId'].astype('int64')
            self.ratings_data['movieId'] = self.ratings_data['movieId'].astype('int64')
            self.ratings_data['userId'] = self.ratings_data['userId'].astype('int64')
            self.ratings_data['rating'] = pd.to_numeric(self.ratings_data['rating'], downcast='float').astype('float32')

            print("Merging datasets...")
            self.combined_data = pd.merge(
                self.ratings_data,
                self.movies_data,
                on='movieId',
                how='inner'
            )

            del self.ratings_data
            gc.collect()
            self.setup_vectorizers()
            return True
        except Exception as e:
            print(f"Error loading data: {str(e)}")
            return False

    def setup_vectorizers(self):
        max_features = 5000
        print("Setting up title vectorizer...")
        self.vectorizer_title = TfidfVectorizer(ngram_range=(1, 2), max_features=max_features, dtype=np.float32)
        self.tfidf_title = self.vectorizer_title.fit_transform(self.movies_data['title'])

        print("Setting up genres vectorizer...")
        self.vectorizer_genres = TfidfVectorizer(ngram_range=(1, 2), max_features=max_features, dtype=np.float32)
        self.movies_data['genres_text'] = self.movies_data['genres'].apply(
            lambda x: ' '.join(x) if isinstance(x, list) else ''
        )
        self.tfidf_genres = self.vectorizer_genres.fit_transform(self.movies_data['genres_text'])
        del self.movies_data['genres_text']
        gc.collect()

    @lru_cache(maxsize=100)
    def search_by_title(self, title):
        try:
            title = self.clean_title(title)
            query_vec = self.vectorizer_title.transform([title])
            similarity = cosine_similarity(query_vec, self.tfidf_title).flatten()
            indices = np.argpartition(similarity, -5)[-5:]
            results = self.movies_data.iloc[indices].iloc[::-1]
            return results
        except Exception as e:
            print(f"Error in search_by_title: {str(e)}")
            return pd.DataFrame()

    def get_recommendations(self, movie_id, top_n=10):
        try:
            movie_id = np.int32(movie_id)
            similar_users = self.combined_data[
                (self.combined_data['movieId'] == movie_id) & 
                (self.combined_data['rating'] >= 4)
            ]['userId'].unique()

            similar_user_recs = self.combined_data[
                (self.combined_data['userId'].isin(similar_users)) & 
                (self.combined_data['rating'] >= 4)
            ]['movieId'].value_counts(normalize=True)

            all_user_recs = self.combined_data[
                (self.combined_data['movieId'].isin(similar_user_recs.index)) & 
                (self.combined_data['rating'] >= 4)
            ]['movieId'].value_counts(normalize=True)

            selected_genres = self.combined_data[
                self.combined_data['movieId'] == movie_id
            ]['genres'].iloc[0]

            if isinstance(selected_genres, list):
                selected_genres = " ".join(selected_genres)

            query_vec = self.vectorizer_genres.transform([selected_genres])
            similarity = cosine_similarity(query_vec, self.tfidf_genres).flatten()
            similar_genre_indices = np.argpartition(similarity, -10)[-10:]
            similar_genre_ids = self.movies_data.iloc[similar_genre_indices]['movieId'].values.astype('int32')

            similar_user_recs = similar_user_recs.copy()
            all_user_recs = all_user_recs.copy()

            similar_user_recs.loc[similar_user_recs.index.isin(similar_genre_ids)] *= 1.5
            all_user_recs.loc[all_user_recs.index.isin(similar_genre_ids)] *= 0.9

            scores = pd.concat([
                similar_user_recs.rename('similar'),
                all_user_recs.rename('all')
            ], axis=1).fillna(0)

            scores['score'] = np.where(
                scores['all'] > 0,
                scores['similar'] / scores['all'],
                0
            )

            top_scores = scores.sort_values('score', ascending=False).head(top_n)
            recommendations = top_scores.merge(
                self.movies_data,
                left_index=True,
                right_on='movieId'
            )[['title', 'score', 'genres']]
            return recommendations
        except Exception as e:
            print(f"Error in get_recommendations: {str(e)}")
            return pd.DataFrame()

class MovieRecommendationApp:
    def __init__(self, root):
        self.root = root
        self.root.title("🎀 Movie Recommender (Pretty Edition) 🎀")
        self.root.geometry("1000x700")
        self.root.configure(bg="#ffe6f0")  # Soft pink background
        self.style = ttk.Style()
        self.configure_styles()
        self.recommender = MemoryOptimizedRecommender()
        self.setup_gui()
        self.load_data("movies.csv", "ratings.csv")

    def configure_styles(self):
        self.style.theme_use("clam")
        self.style.configure("TFrame", background="#ffe6f0")
        self.style.configure("TLabel", background="#ffe6f0", font=("Segoe UI", 10))
        self.style.configure("TLabelFrame", background="#ffe6f0", font=("Segoe UI", 11, "bold"), foreground="#cc66a2")
        self.style.configure("TEntry", padding=5)
        self.style.configure("TButton", font=("Segoe UI", 10, "bold"), background="#ffb3d9", foreground="#4b004b")
        self.style.map("TButton", background=[("active", "#ff99cc")])
        self.style.configure("Treeview", font=("Segoe UI", 10), rowheight=25)
        self.style.configure("Treeview.Heading", font=("Segoe UI", 10, "bold"), background="#ffccf2")
        self.style.map("Treeview", background=[("selected", "#ffcce6")])
        self.style.configure("Status.TLabel", background="#ffd9ec", font=("Segoe UI", 9), relief="sunken")

    def setup_gui(self):
        main_frame = ttk.Frame(self.root)
        main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)

        menubar = tk.Menu(self.root)
        self.root.config(menu=menubar)
        file_menu = tk.Menu(menubar, tearoff=0)
        file_menu.add_command(label="Load Data", command=self.prompt_load_data)
        file_menu.add_separator()
        file_menu.add_command(label="Exit", command=self.root.quit)
        menubar.add_cascade(label="File", menu=file_menu)

        input_frame = ttk.LabelFrame(main_frame, text="Enter Your Favorite Movie")
        input_frame.pack(fill=tk.X, pady=5)

        ttk.Label(input_frame, text="Movie Title:").pack(side=tk.LEFT, padx=5)
        self.movie_entry = ttk.Entry(input_frame, width=50)
        self.movie_entry.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
        self.movie_entry.bind("<Return>", lambda e: self.search_movies())
        ttk.Button(input_frame, text="Search", command=self.search_movies).pack(side=tk.LEFT, padx=5)

        results_frame = ttk.LabelFrame(main_frame, text="Search Results - Select a Movie")
        results_frame.pack(fill=tk.BOTH, expand=True, pady=5)

        self.results_tree = ttk.Treeview(results_frame, columns=('title', 'genres'), show='headings')
        self.results_tree.heading('title', text='Title')
        self.results_tree.heading('genres', text='Genres')
        self.results_tree.column('title', width=400)
        self.results_tree.column('genres', width=300)
        scrollbar = ttk.Scrollbar(results_frame, orient="vertical", command=self.results_tree.yview)
        self.results_tree.configure(yscrollcommand=scrollbar.set)
        self.results_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        rec_frame = ttk.LabelFrame(main_frame, text="Recommended Movies")
        rec_frame.pack(fill=tk.BOTH, expand=True, pady=5)

        self.rec_tree = ttk.Treeview(rec_frame, columns=('title', 'score', 'genres'), show='headings')
        self.rec_tree.heading('title', text='Title')
        self.rec_tree.heading('score', text='Match Score')
        self.rec_tree.heading('genres', text='Genres')
        self.rec_tree.column('title', width=350)
        self.rec_tree.column('score', width=100)
        self.rec_tree.column('genres', width=250)
        rec_scrollbar = ttk.Scrollbar(rec_frame, orient="vertical", command=self.rec_tree.yview)
        self.rec_tree.configure(yscrollcommand=rec_scrollbar.set)
        self.rec_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        rec_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        self.status_var = tk.StringVar()
        self.status_var.set("Ready")
        status_bar = ttk.Label(main_frame, textvariable=self.status_var, style="Status.TLabel")
        status_bar.pack(fill=tk.X, pady=5)

        self.results_tree.bind('<<TreeviewSelect>>', self.show_recommendations)

    def prompt_load_data(self):
        movies_path = filedialog.askopenfilename(title="Select Movies CSV File", filetypes=[("CSV files", "*.csv")])
        if not movies_path:
            return
        ratings_path = filedialog.askopenfilename(title="Select Ratings CSV File", filetypes=[("CSV files", "*.csv")])
        if not ratings_path:
            return
        self.load_data(movies_path, ratings_path)

    def load_data(self, movies_path, ratings_path):
        self.status_var.set("Loading data...")
        self.root.update()
        if self.recommender.load_and_preprocess(movies_path, ratings_path):
            self.status_var.set(f"Loaded {len(self.recommender.movies_data)} movies")
        else:
            self.status_var.set("Failed to load data")
            messagebox.showerror("Error", "Could not load data files")

    def search_movies(self):
        query = self.movie_entry.get().strip()
        if not query:
            messagebox.showwarning("Warning", "Please enter a movie title")
            return
        self.status_var.set(f"Searching for: {query}...")
        self.root.update()
        try:
            results = self.recommender.search_by_title(query)
            for item in self.results_tree.get_children():
                self.results_tree.delete(item)
            for idx, row in results.iterrows():
                genres = ', '.join(row['genres']) if isinstance(row['genres'], list) else str(row['genres'])
                self.results_tree.insert('', tk.END, values=(row['title'], genres))
            if self.results_tree.get_children():
                self.results_tree.selection_set(self.results_tree.get_children()[0])
                self.show_recommendations()
                self.status_var.set(f"Found {len(results)} matches for: {query}")
            else:
                self.status_var.set("No matches found")
                messagebox.showinfo("Info", "No movies found matching your search")
        except Exception as e:
            self.status_var.set("Search error")
            messagebox.showerror("Error", f"Failed to search movies: {str(e)}")

    def show_recommendations(self, event=None):
        selected_item = self.results_tree.selection()
        if not selected_item:
            return
        try:
            selected_title = self.results_tree.item(selected_item)['values'][0]
            self.status_var.set(f"Getting recommendations for: {selected_title}...")
            self.root.update()
            movie_id = self.recommender.movies_data[
                self.recommender.movies_data['title'] == selected_title
            ]['movieId'].values[0]
            recs = self.recommender.get_recommendations(movie_id)
            if recs.empty:
                self.status_var.set("No recommendations found")
                messagebox.showinfo("Info", "No recommendations found for this movie")
                return
            for item in self.rec_tree.get_children():
                self.rec_tree.delete(item)
            for idx, row in recs.iterrows():
                genres = ', '.join(row['genres']) if isinstance(row['genres'], list) else str(row['genres'])
                score = f"{row['score']:.4f}"
                self.rec_tree.insert('', tk.END, values=(row['title'], score, genres))
            self.status_var.set(f"Showing {len(recs)} recommendations for: {selected_title}")
        except Exception as e:
            self.status_var.set("Recommendation error")
            messagebox.showerror("Error", f"Failed to get recommendations: {str(e)}")

if __name__ == "__main__":
    try:
        root = tk.Tk()
        app = MovieRecommendationApp(root)
        root.mainloop()
    except Exception as e:
        messagebox.showerror("Fatal Error", f"Application crashed: {str(e)}")


Loading movies data...
Loading ratings data...
Merging datasets...
Setting up title vectorizer...
Setting up genres vectorizer...
