In [1]:
import pandas as pd
import pandas as pd
import numpy as np
import warnings
from sklearn.manifold import TSNE
import plotly.express as px
from helpers import load_data, get_embedding
import plotly.graph_objects as go

warnings.filterwarnings("ignore")

DATA_PATH = '/Users/tanguydeclety/Documents/GitHub/ada-2023-project-tada/data/tada/'

# Load the data
loaded_data = load_data(DATA_PATH)

character_metadata = loaded_data['character_metadata']
movie_metadata = loaded_data['movie_metadata']
plot_summaries = loaded_data['plot_summaries']
embeddings = loaded_data['embeddings']
combined_plot_summaries = loaded_data['combined_plot_summaries']

In [None]:
tsne = TSNE(n_components=2, random_state=0)
tsne_obj = tsne.fit_transform(embeddings)

In [None]:
def movie_id_to_name(wikipedia_movie_id):
    if wikipedia_movie_id is None:
        return None
    
    names = movie_metadata.loc[movie_metadata['Wikipedia movie ID'] == wikipedia_movie_id]['Movie name']

    if names.empty:
        return "Unknown"
    return names.values[0]

movie_names = combined_plot_summaries['Wikipedia movie ID'].apply(movie_id_to_name)

In [None]:
search_terms = ["War", "Pink", "Detective", "Indian", "Love"]

# Calculate cosine similarities for each search term
cosine_similarities_dict = {}
for term in search_terms:
    search_embedding = get_embedding(term)
    cosine_similarities_dict[term] = np.dot(embeddings, search_embedding)


In [None]:

# Initial plot with the first search term
initial_term = search_terms[0]
tsne_df = pd.DataFrame({'X': tsne_obj[:, 0], 'Y': tsne_obj[:, 1], 'Movie': movie_names, 'Color': cosine_similarities_dict[initial_term]})

# Creating the figure using graph_objects
fig = go.Figure(data=go.Scatter(
    x=tsne_df['X'], 
    y=tsne_df['Y'],
    mode='markers',
    marker=dict(
        color=tsne_df['Color'],  # Set the color of markers
        colorscale='RdBu',  # Color scale
        colorbar=dict(title='Cosine Similarity')  # Color bar title
    ),
    text=tsne_df['Movie'],  # Hover text
    hoverinfo='text'
))

# Set the title of the plot
fig.update_layout(title=f'T-SNE plot of movie embeddings for search of {initial_term}')

# Dropdown menu for switching between terms
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list(
                [
                    dict(
                        label=term,
                        method="update",
                        args=[{"marker.color": [cosine_similarities_dict[term]]},
                              {"title": f'T-SNE plot of movie embeddings for search of {term}'}]
                    ) for term in search_terms
                ]
            )
        )
    ]
)

fig.show()
# Save as html file
fig.write_html("plots/1-tsne.html", include_plotlyjs="cdn")
