In [None]:
from recommender import Recommender
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display, clear_output


In [None]:
# assert pandas version 1.5.3
assert pd.__version__ == '1.5.3'

In [None]:
def pca_visualize(watched_movies, recommended_movies, df_original, df_processed):
    # mark watched movies as red, convert movies to coordinates
    points_watched = []
    for movie in watched_movies:
        # get index of the movie in original dataframe
        movie_index = df_original[df_original['Series_Title'] == movie].index[0]

        # get distances and indices of the closest neighbors, unsparse before reshape
        point = df_processed.iloc[movie_index, :].values
        # if point is sparse, convert to numpy array
        if isinstance(point, (pd.SparseArray, pd.SparseSeries)):
            point = point.to_numpy()
        point = point.reshape(1, -1)
        points_watched.append(point)

    avg = np.mean(points_watched, axis=0)

    # mark recommended movies as green, convert movies to coordinates
    points_recommended = []
    for movie in recommended_movies:
        # get index of the movie in original dataframe
        movie_index = df_original[df_original['Series_Title'] == movie].index[0]

        # get distances and indices of the closest neighbors, unsparse before reshape
        point = df_processed.iloc[movie_index, :].values
        # if point is sparse, convert to numpy array
        if isinstance(point, (pd.SparseArray, pd.SparseSeries)):
            point = point.to_numpy()
        point = point.reshape(1, -1)
        points_recommended.append(point)

    # create PCA instance
    pca = PCA(n_components=2)

    # fit PCA on the data
    pca.fit(df_processed)

    # transform data
    df_pca = pca.transform(df_processed)

    # create a scatter plot of the transformed data
    # clear the plot
    plt.clf()
    # set size of the plot to 10x10
    plt.figure(figsize=(12, 8))
    plt.scatter(df_pca[:, 0], df_pca[:, 1], alpha=0.2)
    for point, movie_title in zip(points_watched, watched_movies):
        point_pca = pca.transform(point)
        plt.scatter(point_pca[:, 0], point_pca[:, 1], color='red')
        plt.text(point_pca[:, 0], point_pca[:, 1], movie_title, fontsize=8)

    for point, movie_title in zip(points_recommended, recommended_movies):
        point_pca = pca.transform(point)
        plt.scatter(point_pca[:, 0], point_pca[:, 1], color='green')
        plt.text(point_pca[:, 0], point_pca[:, 1], movie_title, fontsize=8)
    
    # avg as yellow
    avg_pca = pca.transform(avg)
    plt.scatter(avg_pca[:, 0], avg_pca[:, 1], color='yellow')
    plt.text(avg_pca[:, 0], avg_pca[:, 1], 'avg', fontsize=8)

    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.show()


In [None]:
df_original = pd.read_csv('./data/data.csv')
# processed file load from pickle
df_processed = pd.read_pickle('./data/df_processed_idf.pkl')

In [None]:
recommender = Recommender(df_processed, df_original, metric='cosine')

In [None]:
# List of movies
all_movies = df_original['Series_Title'].unique().tolist()

watched_movies = []

# Exclude watched movies from all_movies list
unwatched_movies = [movie for movie in all_movies if movie not in watched_movies]

# Create scrollable list of movies with larger size and longer width
movie_list = widgets.SelectMultiple(
    options=unwatched_movies,
    value=watched_movies,
    description='Recommended movies',
    disabled=False,
    rows=16  # Set the number of rows to show n movies at once
)

# Set the width of the movie_list widget to 800 pixels
movie_list.layout = widgets.Layout(width='800px')

# Create button to recalculate and print watched movies
button = widgets.Button(description="Recalculate")

# Output widget to display watched and recommended movies
output = widgets.Output()

# Output widget to display PCA plot
pca_output = widgets.Output()

def on_button_click(b):
    global watched_movies
    newly_watched_movies = list(movie_list.value)
    watched_movies += newly_watched_movies
    for movie in newly_watched_movies:
        recommender.add_movie(movie)

    recommended = recommender.recommend(use_avg=False, n_recommendations_for_each_movie=8)
    recommended_movies = recommended['movie'].tolist()

    # Update unwatched_movies list
    recommended_movies_first = [movie for movie in recommended_movies if movie not in watched_movies]
    unwatched_movies = recommended_movies_first + [movie for movie in all_movies if movie not in watched_movies and movie not in recommended_movies]

    # Update options of movie_list widget
    movie_list.options = unwatched_movies

    with output:
        # Clear previous output
        clear_output(wait=True)
        # Print new values
        print("Watched movies:", watched_movies)
        print("Recommended Movies:", len(recommended_movies))
        
    with pca_output:
        # Clear previous plot
        clear_output(wait=True)
        # Plot new data
        pca_visualize(watched_movies, recommended_movies, df_original, df_processed)


button.on_click(on_button_click)

# Display widgets
display(movie_list)
display(button)
display(output)
display(pca_output)