In [None]:
import pandas as pd
import pandas as pd
import numpy as np
import warnings
from sklearn.manifold import TSNE
import plotly.express as px
from helpers import load_data, get_embedding
import tqdm
import plotly.graph_objects as go

warnings.filterwarnings("ignore")

DATA_PATH = 'data/'

# Load the data
loaded_data = load_data(DATA_PATH)

character_metadata = loaded_data['character_metadata']
movie_metadata = loaded_data['movie_metadata']
plot_summaries = loaded_data['plot_summaries']
embeddings = loaded_data['embeddings']
combined_plot_summaries = loaded_data['combined_plot_summaries']

In [None]:
cities = [
    # Europe
    'Paris', 'Berlin', 'Bucharest', 'Kiev', 'Amsterdam', 'Zurich', 'London', 
    'Madrid', 'Lisbon', 'Rome', 'Athens', 'Oslo', 'Stockholm', 'Helsinki',
    'Copenhagen', 'Dublin', 'Brussels', 'Warsaw', 'Prague', 'Vienna', 'Budapest',
    'Belgrade', 'Sofia', 'Tirana', 'Reykjavik', 'Luxembourg', 'Monaco', 'Vaduz',
    'San Marino', 'Andorra la Vella', 'Moscow', 'Saint Petersburg', 'Istanbul', 'Ankara',
    'Edinburgh', 'Cardiff', 'Belfast',

    # Americas
    'Washington D.C.', 'Ottawa', 'Mexico City', 'Buenos Aires', 'Brasília', 'Santiago',
    'Lima', 'Bogotá', 'Caracas', 'Quito', 'Montevideo', 'Havana', 'Kingston',
    'San José', 'Panama City', 'Tegucigalpa', 'San Salvador', 'Guatemala City',
    'Managua', 'Port-au-Prince', 'Santo Domingo',

    # Asia
    'Beijing', 'Tokyo', 'New Delhi', 'Seoul', 'Jakarta', 'Bangkok', 'Manila',
    'Kuala Lumpur', 'Singapore', 'Islamabad', 'Dhaka', 'Astana', 'Ulaanbaatar',
    'Riyadh', 'Tehran', 'Baghdad', 'Jerusalem', 'Doha', 'Dubai', 'Abu Dhabi',
    'Kabul', 'Yerevan', 'Baku', 'Tbilisi',

    # Africa
    'Cairo', 'Nairobi', 'Pretoria', 'Algiers', 'Rabat', 'Lagos', 'Addis Ababa',
    'Accra', 'Dakar', 'Tunis', 'Tripoli', 'Khartoum', 'Luanda', 'Harare', 'Kigali',
    'Kampala', 'Mogadishu', 'Bamako', 'Niamey'
]



embedded_cities = []

for city in tqdm.tqdm(cities):
    embedded_cities.append(get_embedding(city))
    

In [None]:
movies_in_cities_and_countries = { city_country: [] for city_country in cities }

for city, city_embedding in tqdm.tqdm(zip(cities, embedded_cities)):
    cosine_similarities = np.dot(embeddings, city_embedding)
    
    # We take 3 stds above the mean movies to get a good sample of movies
    similarity_mean = np.mean(cosine_similarities)
    similarity_std = np.std(cosine_similarities)
    
    similar_indices = np.where(cosine_similarities > similarity_mean + 3 * similarity_std)[0]

    wikipedia_movie_ids = combined_plot_summaries.iloc[similar_indices]['Wikipedia movie ID'].values
    movies_in_cities_and_countries[city] = wikipedia_movie_ids


In [None]:
embeddings_of_movies_in_cities_and_countries = { city_country: [] for city_country in cities }

for city_country in cities:
    embeddings_of_movies_in_cities_and_countries[city_country] = np.array(combined_plot_summaries.loc[combined_plot_summaries['Wikipedia movie ID'].isin(movies_in_cities_and_countries[city_country])]['embedding'].values.tolist())

In [None]:
general_terms = ['Drugs', 'Love', 'War', 'Poverty', 'Comedy', 'Happiness', 'Sadness', 'Gang', 'Hippies', 'Guns']
embeddings_of_general_terms = { general_term: get_embedding(general_term) for general_term in general_terms }
similarity_movie_to_term = { city : { general_term: 0.0 for general_term in general_terms } for city in cities }

embedded_cities_and_countries = np.array(embedded_cities)

for city in cities:
    for term, term_embedding in embeddings_of_general_terms.items():
        cosine_similarities = np.dot(np.array(embeddings_of_movies_in_cities_and_countries[city]).reshape(-1, 1536), term_embedding)
        similarity_movie_to_term[city][term] = np.mean(cosine_similarities)

In [None]:
precomputed_data = {}

for term in general_terms:
    data_pairs = [(city, similarity_movie_to_term[city][term]) for city in cities]
    data_pairs.sort(key=lambda x: x[1])

    sorted_cities = [pair[0] for pair in data_pairs]
    similarity_scores = [pair[1] for pair in data_pairs]
    hover_text = [f"{city}: {score:.2f}" for city, score in zip(sorted_cities, similarity_scores)]

    precomputed_data[term] = {
        "sorted_cities": sorted_cities,
        "similarity_scores": similarity_scores,
        "hover_text": hover_text
    }


In [None]:

# Initial term
initial_term = general_terms[0]

# Create the figure
fig = go.Figure(data=[go.Scatter(
    x=precomputed_data[initial_term]["similarity_scores"], 
    y=[1] * len(precomputed_data[initial_term]["similarity_scores"]), 
    mode='markers',
    text=precomputed_data[initial_term]["hover_text"],
    hoverinfo='text'
)])

# Update layout and add dropdown
fig.update_layout(
    title_text=f'Similarity to the Word "{initial_term}" for Cities and Countries',
    xaxis=dict(
        title='Similarity Score',
        tickvals=precomputed_data[initial_term]["similarity_scores"],
        ticktext=precomputed_data[initial_term]["sorted_cities"]
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showticklabels=False,
    ),
    hovermode='closest',
    showlegend=False,
    updatemenus=[dict(
        buttons=[dict(
            label=term,
            method='update',
            args=[{'x': [precomputed_data[term]["similarity_scores"]], 
                   'text': [precomputed_data[term]["hover_text"]]},
                  {'title': f'Similarity to the Word "{term}" for Cities and Countries', 
                   'xaxis.tickvals': precomputed_data[term]["similarity_scores"], 
                   'xaxis.ticktext': precomputed_data[term]["sorted_cities"]}]
        ) for term in general_terms],
        direction='down',
        showactive=True,
    )],
    hoverlabel=dict(
        bgcolor="white", 
        font_size=16, 
        font_family="Rockwell"
    ),
)

fig.update_xaxes(rangeslider=dict(visible=True))

fig.show()

In [None]:
import googlemaps
import pandas as pd
import os

gmaps = googlemaps.Client(key=os.environ['GOOGLE_MAPS_API_KEY'])

# Dictionary to hold coordinates of each city
city_coordinates = {}

for city in cities:
    geocode_result = gmaps.geocode(city)
    if geocode_result:
        lat = geocode_result[0]["geometry"]["location"]["lat"]
        lng = geocode_result[0]["geometry"]["location"]["lng"]
        city_coordinates[city] = (lat, lng)
    else:
        city_coordinates[city] = (None, None)


In [None]:
from sklearn.preprocessing import MinMaxScaler

# Dictionary to hold data for each term
term_data = {}

scaler = MinMaxScaler()

for term in general_terms:
    similarity_scores = [similarity_movie_to_term[city][term] for city in cities]
    df = pd.DataFrame({
        'City': cities,
        'Latitude': [city_coordinates[city][0] for city in cities],
        'Longitude': [city_coordinates[city][1] for city in cities],
        'Similarity': similarity_scores
    })

    df['Similarity_Scaled'] = scaler.fit_transform(df[['Similarity']])
    term_data[term] = df


In [None]:
import plotly.graph_objects as go

# Initial term for the map
initial_term = general_terms[0]
df = term_data[initial_term]

# Creating the initial map
fig = go.Figure(go.Scattergeo(
    lat=df['Latitude'],
    lon=df['Longitude'],
    text=df['City'] + ": " + df['Similarity'].astype(str),
    marker=dict(
        color=df['Similarity_Scaled'],
        line_color='rgb(40,40,40)',
        line_width=0.5,
        sizemode='diameter'
    ),
    hoverinfo='text'
))

fig.update_layout(
    title_text=f"Map for Similarity to the Word '{initial_term}' for Cities and Countries",
    geo=dict(
        showland=True,
        landcolor='rgb(217, 217, 217)',
        projection_type='natural earth'
    ),
    updatemenus=[dict(
        buttons=[dict(
            label=term,
            method='update',  # Using 'update' to change both data and layout
            args=[{'lat': [term_data[term]['Latitude']], 
                   'lon': [term_data[term]['Longitude']],
                   'text': [term_data[term]['City'] + ": " + term_data[term]['Similarity'].astype(str)],
                   'marker.color': [term_data[term]['Similarity_Scaled']]},
                  {'title': f"Map for Similarity to the Word '{term}' for Cities and Countries"}]  # Layout update for title
        ) for term in general_terms],
        direction='down',
        showactive=True
    )]
)

fig.show()
# Save /plots/2-map.html
fig.write_html("plots/2-map.html")