In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
import tkinter as tk
from tkinter import ttk
from PIL import Image, ImageTk
import requests
import io
import re

In [29]:
# 數據加載
def load_data():
    # 讀取電影評分數據表
    ratings = pd.read_csv('ml-latest-small/ratings.csv')
    
    # 讀取電影數據
    movies = pd.read_csv('ml-latest-small/movies.csv')
    
    # 把兩個data合併
    data = pd.merge(ratings, movies, on='movieId')
    
    # 創建用戶 ID 和電影 ID 的映射
    user_id_mapping = {id: i for i, id in enumerate(data['userId'].unique())}
    movie_id_mapping = {id: i for i, id in enumerate(data['movieId'].unique())}
    
    # 映射用戶 ID 和電影 ID
    data['userId'] = data['userId'].map(user_id_mapping)
    data['movieId'] = data['movieId'].map(movie_id_mapping)
    
    # 生成用戶-物品評分矩陣
    R = data.pivot(index='userId', columns='movieId', values='rating').fillna(0).values
    
    # 生成電影類型矩陣
    genres = data['genres'].str.get_dummies('|')
    
    return data, R, genres, movie_id_mapping

# 確定電影名稱
def clean_movie_title(title):
    title = re.sub(r'\([^)]*\)', '', title)  # 去除括號及其中内容
    title = re.sub(r'[^a-zA-Z0-9\s]', '', title)  # 去除特殊字符
    title = title.strip()  # 去除前后空格
    return title

# 使用 TMDB API 獲得電影圖片和簡介
def get_movie_details(movie_title, api_key, language='zh'):
    search_url = f"https://api.themoviedb.org/3/search/movie?api_key={api_key}&query={movie_title}&language={language}"
    response = requests.get(search_url)
    if response.status_code == 200:
        data = response.json()
        if 'results' in data and data['results']:
            movie_id = data['results'][0]['id']
            poster_path = data['results'][0]['poster_path']
            overview = data['results'][0]['overview']
            poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}"
            poster_response = requests.get(poster_url)
            if poster_response.status_code == 200:
                poster_image = Image.open(io.BytesIO(poster_response.content))
                return poster_image, overview
    print(f"Movie '{movie_title}' not found in TMDB.")
    return None, None

# 構建電影推薦模型
def build_model(num_users, num_items, num_genres, K):
    user_input = Input(shape=(1,))
    item_input = Input(shape=(1,))
    
    user_embedding = Embedding(num_users, K)(user_input)
    item_embedding = Embedding(num_items, K)(item_input)
    
    user_vector = Flatten()(user_embedding)
    item_vector = Flatten()(item_embedding)
    
    # 電影類型输入
    genre_input = Input(shape=(num_genres,))
    
    # 合併嵌入向量和電影類型
    merged_vector = Concatenate()([user_vector, item_vector, genre_input])
    
    # 添加隱藏層
    hidden = Dense(128, activation='relu')(merged_vector)
    hidden = Dense(64, activation='relu')(hidden)
    output = Dense(1)(hidden)
    
    model = Model(inputs=[user_input, item_input, genre_input], outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# 評估模型
def evaluate_model(R, R_pred):
    mask = R > 0
    return sqrt(mean_squared_error(R[mask], R_pred[mask]))


In [35]:
# GUI 設計
class RecommenderApp:
    def __init__(self, root, genres, movies, movie_id_mapping, api_key):
        self.root = root
        self.root.title("Movie Recommendation System")
        
        self.movies = movies
        self.movie_id_mapping = movie_id_mapping
        self.api_key = api_key
        
        # 介面設計
        self.main_frame = tk.Frame(root, padx=10, pady=10)
        self.main_frame.pack(fill=tk.BOTH, expand=True)
        
        self.label = ttk.Label(self.main_frame, text="Movie Recommendation System", font=("Helvetica", 16))
        self.label.pack(pady=10)
        
        self.genre_label = ttk.Label(self.main_frame, text="Choose your favorite genres:", font=("Helvetica", 12))
        self.genre_label.pack(pady=5)
        
        self.genre_frame = tk.Frame(self.main_frame)
        self.genre_frame.pack(pady=5)
        
        self.genre_vars = {genre: tk.IntVar() for genre in genres}
        
        row, col = 0, 0
        for genre, var in self.genre_vars.items():
            cb = ttk.Checkbutton(self.genre_frame, text=genre, variable=var)
            cb.grid(row=row, column=col, padx=5, pady=2)
            col += 1
            if col == 5:
                col = 0
                row += 1
        
        self.run_button = ttk.Button(self.main_frame, text="Recommend", command=self.run_recommendation)
        self.run_button.pack(pady=10)
        
        # 電影片單滾動區域
        self.canvas = tk.Canvas(self.main_frame)
        self.scroll_y = tk.Scrollbar(self.main_frame, orient="vertical", command=self.canvas.yview)
        self.scroll_frame = tk.Frame(self.canvas)
        self.scroll_frame.bind("<Configure>", lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all")))
        self.canvas.create_window((0, 0), window=self.scroll_frame, anchor="nw")
        self.canvas.configure(yscrollcommand=self.scroll_y.set)
        
        self.canvas.pack(side="left", fill="both", expand=True)
        self.scroll_y.pack(side="right", fill="y")

        for i in range(3):
            self.scroll_frame.grid_columnconfigure(i, weight=1)
    
    def run_recommendation(self):
        # 清空結果框架
        for widget in self.scroll_frame.winfo_children():
            widget.destroy()
        
        # 獲得用戶所選的電影類型
        selected_genres = [genre for genre, var in self.genre_vars.items() if var.get() == 1]
        
        # 把電影類型傳換成模型輸入格式
        genre_vector = np.zeros(len(self.genre_vars))
        for genre in selected_genres:
            genre_vector[list(self.genre_vars.keys()).index(genre)] = 1
        
        # 假設我們有一個用戶 ID 和物品 ID 列表（實際應用中需要根據用戶和物品信息生成）
        user_id = 1  # 示例用戶 ID
        item_ids = np.arange(num_items)  # 示例物品 ID 列表
        
        # 預測評分
        predictions = model.predict([np.array([user_id] * num_items), item_ids, np.array([genre_vector] * num_items)])
        
        # 獲得預測評分和電影 ID
        movie_scores = list(zip(item_ids, predictions.flatten()))
        # 電影按照評分從高到低排序
        movie_scores.sort(key=lambda x: x[1], reverse=True)
        #推薦9部電影
        recommended_movies = []
        for movie_id, score in movie_scores:
            if len(recommended_movies) >= 9:  
                break
            movie_genres = self.movies[self.movies['movieId'] == movie_id]['genres'].values[0]
            #確定電影是用戶所選擇的類型
            if any(genre in movie_genres for genre in selected_genres): 
                movie_title = self.movies[self.movies['movieId'] == movie_id]['title'].values[0]
                poster_image, overview = get_movie_details(movie_title, self.api_key)
                if poster_image:
                    recommended_movies.append((movie_title, score, poster_image, overview))
        
        row, col = 0, 0
        for title, score, poster_image, overview in recommended_movies:
            poster_image = poster_image.resize((100, 150), Image.Resampling.LANCZOS)
            poster_photo = ImageTk.PhotoImage(poster_image)
            img_label = tk.Label(self.scroll_frame, image=poster_photo)
            img_label.image = poster_photo  # 保持引用
            img_label.grid(row=row, column=col, padx=5, pady=5, sticky='n')
            
            title_label = tk.Label(self.scroll_frame, text=f"{title} ({score:.2f})", font=("Helvetica", 10))
            title_label.grid(row=row+1, column=col, padx=5, pady=5, sticky='n')
            
            img_label.bind("<Button-1>", lambda e, overview=overview: self.show_overview(overview))
            
            col += 1
            if col == 3:  # 一行放3三個電影
                col = 0
                row += 2
    
    def show_overview(self, overview):
        top = tk.Toplevel(self.root)
        top.title("Movie Overview")
        msg = tk.Message(top, text=overview, padx=10, pady=10)
        msg.pack()

if __name__ == "__main__":
    # 數據加載
    data, R, genres, movie_id_mapping = load_data()
    
    # 調整输出用戶和物品的最大ID
    max_user_id = data['userId'].max()
    max_movie_id = data['movieId'].max()
    print(f"Max user ID: {max_user_id}, Max movie ID: {max_movie_id}")

    # 構建和訓練模型
    num_users = data['userId'].nunique()
    num_items = data['movieId'].nunique()
    num_genres = genres.shape[1]
    K = 50  # 淺在特徵維度
    
    model = build_model(num_users, num_items, num_genres, K)
    
    # 訓練模型
    users, items = data['userId'].values, data['movieId'].values  # 映射后的 ID 已经是從0开始的
    ratings = data['rating'].values
    genres_input = genres.values[users]
    
    model.fit([users, items, genres_input], ratings, epochs=10, batch_size=64)
    
    # GUI啟動
    root = tk.Tk()
    api_key = "60485a0677ed2d4247e826d4e13d2208"  # TMDB的API 
    app = RecommenderApp(root, genres.columns, data, movie_id_mapping, api_key)
    root.mainloop()

Max user ID: 609, Max movie ID: 9723
Epoch 1/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - loss: 1.5789
Epoch 2/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.6960
Epoch 3/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.5940
Epoch 4/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.5132
Epoch 5/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.4437
Epoch 6/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 0.3865
Epoch 7/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.3467
Epoch 8/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.3086
Epoch 9/10
[1m1576/1576[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.2799
Epoch 10/10
[1m1576/1576[0m [32m