# Purpose

### 2022-03-08 post-snoosweek
Visualize the embeddings for the i18n project that includes:
- primary language
- manually curated cluster names
    - high level: ~15 groups
    - sub level: ~50 groups(?)
- Add rating (short and name)

Stretch:
- relevant country
    - this can be trickier because a sub can be relevant to multiple countries


# Imports & notebook setup

In [1]:
%load_ext autoreload
%autoreload 2

# Register bigquery magic (only needed for laptop/local, not colab)
# %load_ext google.cloud.bigquery

In [2]:
# # colab auth for BigQuery
# from google.colab import auth, files, drive
# auth.authenticate_user()
# print('Authenticated')

In [3]:
# colab notebook installs (not needed in local/laptop)
# !pip install plotly fsspec gcsfs

## Install custom library [not needed for local]

### Append google drive path so we can install library from there

In [4]:
# # Attach google drive & import my python utility functions
# # if drive.mount() fails, you can also:
# #   MANUALLY CLICK ON "Mount Drive"
# import sys


# g_drive_root = '/content/drive'

# try:
#     drive.mount(g_drive_root, force_remount=True)
#     print('   Authenticated & mounted Google Drive')
    
# except Exception as e:
#     try:
#         drive._mount(g_drive_root, force_remount=True)
#         print('   Authenticated & mounted Google Drive')
#     except Exception as e:
#         print(e)
#         raise Exception('You might need to manually mount google drive to colab')

# l_paths_to_append = [
#     f'{g_drive_root}/MyDrive/Colab Notebooks',

#     # need to append the path to subclu so that colab can import things properly
#     f'{g_drive_root}/MyDrive/Colab Notebooks/subreddit_clustering_i18n'
# ]
# for path_ in l_paths_to_append:
#     if path_ in sys.path:
#         sys.path.remove(path_)
#     print(f" Appending path: {path_}")
#     sys.path.append(path_)

### Install library

In [5]:
# # install subclu & libraries needed to read parquet files from GCS & spreadsheets
# #  make sure to use the [colab] `extra` because it includes colab-specific libraries
# module_path = f"{g_drive_root}/MyDrive/Colab Notebooks/subreddit_clustering_i18n/[colab]"

# !pip install -e $"$module_path" --quiet

## Regular Imports

In [6]:
import os
from datetime import datetime

from google.cloud import bigquery

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib_venn import venn2_unweighted, venn3_unweighted
from tqdm import tqdm

# increase cell/notebook display width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

# auth for google sheets
import gspread
from oauth2client.client import GoogleCredentials


gc = gspread.authorize(GoogleCredentials.get_application_default())

# os.environ['GOOGLE_CLOUD_PROJECT'] = 'data-science-prod-218515'
os.environ['GOOGLE_CLOUD_PROJECT'] = 'data-prod-165221'

## Custom imports

In [7]:
# subclu imports
import subclu
from subclu.utils.eda import (
    setup_logging, counts_describe, value_counts_and_pcts,
    notebook_display_config, print_lib_versions,
    style_df_numeric, reorder_array,
)
from subclu.models.clustering_utils import (
    create_dynamic_clusters,
    convert_distance_or_ab_to_list_for_fpr,
    reshape_df_to_get_1_cluster_per_row,
    get_primary_topic_mix_cols,
    create_dynamic_clusters_clean,
)

from subclu.models.reshape_clusters_v041 import (
    keep_only_target_labels,
    get_table_for_optimal_dynamic_cluster_params,
    get_dynamic_cluster_summary,
    flag_mature_clusters_to_exclude_from_qa,
)


setup_logging()
notebook_display_config()
print_lib_versions([gspread, pd, np])

python		v 3.7.11
===
gspread		v: 5.0.0
pandas		v: 1.2.4
numpy		v: 1.19.5


# Test interactive example - Dropdowns

Example from:
- https://plotly.com/python/dropdowns/

In [8]:
# %%time

# # load dataset
# df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/volcano.csv")

# # Create figure
# fig = go.Figure()

# # Add surface trace
# fig.add_trace(go.Heatmap(z=df.values.tolist(), colorscale="Viridis"))

# # Update plot sizing
# fig.update_layout(
#     width=800,
#     height=900,
#     autosize=False,
#     margin=dict(t=100, b=0, l=0, r=0),
# )

# # Update 3D scene options
# fig.update_scenes(
#     aspectratio=dict(x=1, y=1, z=0.7),
#     aspectmode="manual"
# )

# # Add dropdowns
# button_layer_1_height = 1.08
# fig.update_layout(
#     updatemenus=[
#         dict(
#             buttons=list([
#                 dict(
#                     args=["colorscale", "Viridis"],
#                     label="Viridis",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=["colorscale", "Cividis"],
#                     label="Cividis",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=["colorscale", "Blues"],
#                     label="Blues",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=["colorscale", "Greens"],
#                     label="Greens",
#                     method="restyle"
#                 ),
#             ]),
#             direction="down",
#             pad={"r": 10, "t": 10},
#             showactive=True,
#             x=0.1,
#             xanchor="left",
#             y=button_layer_1_height,
#             yanchor="top"
#         ),
#         dict(
#             buttons=list([
#                 dict(
#                     args=["reversescale", False],
#                     label="False",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=["reversescale", True],
#                     label="True",
#                     method="restyle"
#                 )
#             ]),
#             direction="down",
#             pad={"r": 10, "t": 10},
#             showactive=True,
#             x=0.37,
#             xanchor="left",
#             y=button_layer_1_height,
#             yanchor="top"
#         ),
#         dict(
#             buttons=list([
#                 dict(
#                     args=[{"contours.showlines": False, "type": "contour"}],
#                     label="Hide lines",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=[{"contours.showlines": True, "type": "contour"}],
#                     label="Show lines",
#                     method="restyle"
#                 ),
#             ]),
#             direction="down",
#             pad={"r": 10, "t": 10},
#             showactive=True,
#             x=0.58,
#             xanchor="left",
#             y=button_layer_1_height,
#             yanchor="top"
#         ),
#     ]
# )

# fig.update_layout(
#     annotations=[
#         dict(text="colorscale", x=0, xref="paper", y=1.06, yref="paper",
#                              align="left", showarrow=False),
#         dict(text="Reverse<br>Colorscale", x=0.25, xref="paper", y=1.07,
#                              yref="paper", showarrow=False),
#         dict(text="Lines", x=0.54, xref="paper", y=1.06, yref="paper",
#                              showarrow=False)
#     ])

# fig.show()

# Load & transform projections from GCS

## Load

In [9]:
%%time

df_tsne_and_meta = pd.read_parquet(
    'gs://i18n-subreddit-clustering/data/models/clustering/manual_v041_2022-03-02_21_09/manual_v041_2022-03-02_21_09/'
    'df_emb_svd2_meta-49558_by_186.parquet'
)
print(df_tsne_and_meta.shape)

(49558, 186)
CPU times: user 1.38 s, sys: 535 ms, total: 1.92 s
Wall time: 7.79 s


In [10]:
df_tsne_and_meta.iloc[:5, :8]

Unnamed: 0,subreddit_name,subreddit_id,tsne_0,tsne_1,pt_date,geo_relevant_subreddit_all,geo_relevant_countries_all,ambassador_or_default_any
0,0hthaatsjaay,t5_46wt4h,14.94499,2.25901,2021-12-14,,,
1,0nlyfantastic0,t5_4byrct,14.796744,0.838811,2021-12-14,True,Spain,False
2,0nlyleaks,t5_36f9u6,16.551882,3.293167,2021-12-14,True,France,False
3,0sanitymemes,t5_2qlzfy,0.434304,1.306796,2021-12-14,,,
4,0xpolygon,t5_2qgijx,-18.975726,-0.046309,2021-12-14,,,


## Create multiple cluster seed columns
Create multiple ones so that it's easier to change the number of subreddit names we see by changing a column name

In [74]:
[c for c in df_tsne_and_meta.columns if c.endswith('_label')][-34:]

['k_0320_label',
 'k_0400_label',
 'k_0500_label',
 'k_0600_label',
 'k_0657_label',
 'k_0700_label',
 'k_0800_label',
 'k_0900_label',
 'k_0958_label',
 'k_1000_label',
 'k_1065_label',
 'k_1250_label',
 'k_1500_label',
 'k_1560_label',
 'k_1750_label',
 'k_1840_label',
 'k_2000_label',
 'k_2207_label',
 'k_2250_label',
 'k_2351_label',
 'k_2500_label',
 'k_2750_label',
 'k_2830_label',
 'k_3000_label',
 'k_3145_label',
 'k_3200_label',
 'k_3400_label',
 'k_3411_label',
 'k_3600_label',
 'k_3706_label',
 'k_3800_label',
 'k_3864_label',
 'k_3927_label',
 'k_4000_label']

In [13]:
mask_ukraine_or_russia_ = (
    (df_tsne_and_meta['subreddit_name'] == 'russia') |
    df_tsne_and_meta['subreddit_name'].str.contains('ukraine')  # |
    # df_tsne_and_meta['subreddit_name'].str.contains('eldenrin')
)

for k_label in [c for c in df_tsne_and_meta.columns if c.endswith('_label')]:
    col_new_seed_names_ = f"subreddit_name_seeds_{k_label.replace('_label', '')}"
    # this pandas version is different from the version I have on my laptop
    mask_top_sub_names_ = ~(
        df_tsne_and_meta
        .sort_values(by=['users_l28'], ascending=False)
        .duplicated(subset=[k_label], keep='first')
    )
    # we might get a name for nulls (subs that were not in the final output b/c of too few posts)
    df_tsne_and_meta[col_new_seed_names_] = ''
    df_tsne_and_meta.loc[
        mask_top_sub_names_, col_new_seed_names_
    ] = df_tsne_and_meta[mask_top_sub_names_]['subreddit_name']

    # fill ukraine
    df_tsne_and_meta.loc[
        mask_ukraine_or_russia_, col_new_seed_names_
    ] = df_tsne_and_meta[mask_ukraine_or_russia_]['subreddit_name']
    

del mask_top_sub_names_, col_new_seed_names_



In [15]:
# (
#     df_tsne_and_meta
#     .sort_values(by=['users_l28'], ascending=True)
#     .duplicated(subset=['k_0023_label'], keep='last')
# )

In [16]:
# (
#     df_tsne_and_meta[
#         ~(
#             df_tsne_and_meta
#             .sort_values(by=['users_l28'], ascending=False)
#             .duplicated(subset=['k_0023_label'], keep='first')
#         )
#     ]
#     [['subreddit_name', 'tsne_0', 'tsne_1', 'posts_l28', 'users_l28']]
# )

In [17]:
# df_tsne_and_meta[
#     df_tsne_and_meta['subreddit_name_seeds_k_0013'] != ''
# ][['subreddit_name', 'tsne_0', 'tsne_1', 'posts_l28', 'users_l28']]

## Create columns to highlight DE & India subreddits
want to color by this column to show whether a sub is relevant to these countries

In [20]:
# df_tsne_and_meta.head()

In [21]:
mask_india_ = (
    (df_tsne_and_meta['geo_relevant_countries_all'].fillna('').str.contains('India'))  #  Germany|India |
#     df_tsne_and_meta['subreddit_name'].str.contains('ukraine') |
#     df_tsne_and_meta['subreddit_name'].str.contains('eldenring')
)
df_tsne_and_meta[mask_india_]['geo_relevant_countries_all'].value_counts()

India                                         1712
Canada, India                                   22
India, United Kingdom                           14
Australia, India                                 8
Australia, India, United Kingdom                 5
Germany, India                                   4
India, Mexico                                    2
Canada, India, Spain                             1
India, Trinidad and Tobago, United Kingdom       1
India, Spain                                     1
Canada, India, United Kingdom                    1
Brazil, Germany, India                           1
Australia, India, Peru                           1
France, India                                    1
Name: geo_relevant_countries_all, dtype: int64

In [49]:
# col_country_india_relevant_ = 'geo_relevant_india'
# val_not_india = 'None/Other'
# df_tsne_and_meta[col_country_india_relevant_] = np.where(
#     mask_india_,
#     df_tsne_and_meta['geo_relevant_countries_all'],
#     val_not_india,
# )
# df_tsne_and_meta[col_country_india_relevant_].value_counts()

### Create seeds that also include india relevant subs

In [23]:

for k_label in [c for c in df_tsne_and_meta.columns if c.endswith('_label')]:
    col_new_seed_names_ = f"subreddit_name_seeds_india_{k_label.replace('_label', '')}"
    # this pandas version is different from the version I have on my laptop
    mask_top_sub_names_ = ~(
        df_tsne_and_meta
        .sort_values(by=['users_l28'], ascending=False)
        .duplicated(subset=[k_label], keep='first')
    )
    sub_names_top_subs_india = (
        df_tsne_and_meta[mask_india_]
        .sort_values(by=['users_l28'], ascending=False)
        .drop_duplicates(subset=[k_label], keep='first')
        ['subreddit_name']
    )
    
    # we might get a name for nulls (subs that were not in the final output b/c of too few posts)
    df_tsne_and_meta[col_new_seed_names_] = ''
    df_tsne_and_meta.loc[
        mask_top_sub_names_ | (df_tsne_and_meta['subreddit_name'].isin(sub_names_top_subs_india))
        , col_new_seed_names_
    ] = df_tsne_and_meta[
        mask_top_sub_names_ | (df_tsne_and_meta['subreddit_name'].isin(sub_names_top_subs_india))
    ]['subreddit_name']


del mask_top_sub_names_, col_new_seed_names_

In [45]:
# [c for c in df_tsne_and_meta.columns if 'india' in c][-25:]

In [25]:
# df_tsne_and_meta['subreddit_name_seeds_india_k_0050'].value_counts()

In [26]:
# df_tsne_and_meta['subreddit_name_seeds_k_0050'].value_counts()

# Static plots

## high level plot

In [71]:
# k_clusters_to_plot_ = 'k_0030_label'
# col_sub_names_cluster_seeds = 'subreddit_name_seeds_k_0090'

# df_plot_ = (
#     df_tsne_and_meta
#     .sort_values(by=k_clusters_to_plot_, ascending=True)
# )

# l_custom_text_ = [
#     'subreddit_name',
#     k_clusters_to_plot_,
# ]
# fig = px.scatter(
#     data_frame=df_plot_,
#     x='tsne_0',
#     y='tsne_1',
#     color=df_plot_[k_clusters_to_plot_].map(lambda x: f"{x:02,.0f}"),
#     opacity=0.6,
#     size_max=11,
#     size=(df_plot_['users_l28'] / 1e6) + np.log(10 + df_plot_['users_l28']),
#     # hover_name='subreddit_name',
#     # hover_data=[k_clusters_to_plot_],
#     # text='subreddit_name',  # using full text is uselss because you can't read any of them
#     text=col_sub_names_cluster_seeds,
#     custom_data=l_custom_text_,
# )
# sub_hovertemplate = "<br>".join(
#     [
#         "<b>%{customdata[0]}</b>",
#         "cluster ID:%{customdata[1]}"
#     ]
# )
# fig.update_layout(
#     width=1200,
#     height=650,
#     autosize=False,
#     yaxis=dict(showgrid=False, zeroline=False,),
#     xaxis=dict(showgrid=False, zeroline=False),
#     plot_bgcolor='#040404',  # dark-gray: '#1a1a1a' '#fcfcfc'
# )
# fig.update_traces(
#     hovertemplate=sub_hovertemplate,
#     marker=dict(
#         # size=(df_tsne_and_meta['users_l28'] / 1e6) + np.log(10 + df_tsne_and_meta['users_l28']),  # np.log(1000 + df_tsne_and_meta['users_l28']),
#         # sizeref is weird, can't get it to behave as expected, so log is good enough for now
#         sizemin=.5,
#         line=dict(
#             width=0,
#         ),
#     ),
#     textfont=dict(
#         color='#fcfcfc',  # '#fcfcfc' '#1a1a1a'
#         size=14,
#         # bgcolor='#ababab',
#         # opacity=0.2,
#     ),
# )

# fig.show(
#     # renderer='png',
#     width=1100,
#     height=650,
# )

## Plot with more subreddit seed names

In [80]:
# k_clusters_to_plot_ = 'k_0090_label'
# col_sub_names_cluster_seeds = 'subreddit_name_seeds_k_0500'  # subreddit_name_seeds_india_k_0500

# df_plot_ = (
#     df_tsne_and_meta
#     .sort_values(by=k_clusters_to_plot_, ascending=True)
# )

# l_custom_text_ = [
#     'subreddit_name',
#     k_clusters_to_plot_,
# ]
# fig = px.scatter(
#     data_frame=df_plot_,
#     x='tsne_0',
#     y='tsne_1',
#     color=df_plot_[k_clusters_to_plot_].map(lambda x: f"{x:02,.0f}"),
#     opacity=0.6,
#     size_max=11,
#     size=(df_plot_['users_l28'] / 1e6) + np.log(10 + df_plot_['users_l28']),
#     # hover_name='subreddit_name',
#     # hover_data=[k_clusters_to_plot_],
#     # text='subreddit_name',  # using full text is uselss because you can't read any of them
#     text=col_sub_names_cluster_seeds,
#     custom_data=l_custom_text_,
# )
# fig.update_layout(
#     width=1200,
#     height=650,
#     autosize=False,
#     yaxis=dict(showgrid=False, zeroline=False,),
#     xaxis=dict(showgrid=False, zeroline=False),
#     plot_bgcolor='#040404',  # dark-gray: '#1a1a1a' '#fcfcfc'
# )
# fig.update_traces(
#     hovertemplate=sub_hovertemplate,
#     marker=dict(
#         size=(df_plot_['users_l28'] / 1e6) + np.log(10 + df_plot_['users_l28']),  # np.log(1000 + df_tsne_and_meta['users_l28']),
#         # sizeref is weird, can't get it to behave as expected, so log is good enough for now
#         sizemin=.5,
#         # sizeref=(2 * max(np.log(df_tsne_and_meta['users_l28'])) / (5 ** 2)),
#         line=dict(
#             width=0,
#         ),
#     ),
#     textfont=dict(
#         color='#fcfcfc',  # '#fcfcfc' '#1a1a1a'
#         size=14,
#         # bgcolor='#ababab',
#         # opacity=0.2,
#     ),
# )

# fig.show(
#     # renderer='png',
#     width=1200,
#     height=650,
# )

## Plot India subreddits

Try color & shape

### India high level

In [29]:

# col_sub_names_cluster_seeds = 'subreddit_name_seeds_india_k_0050'

# l_custom_text_ = [
#     'subreddit_name',
#     'users_l28',
# ]
# sub_hovertemplate = "<br>".join(
#     [
#         "<b>%{customdata[0]}</b>",
#         "Users L28:%{customdata[1]}"
#     ]
# )
# fig = px.scatter(
#     data_frame=df_tsne_and_meta,
#     x='tsne_0',
#     y='tsne_1',
#     color=col_country_india_relevant_,
#     opacity=0.6,
#     size_max=11,
#     size=(df_tsne_and_meta['users_l28'] / 1e6) + np.log(10 + df_tsne_and_meta['users_l28']),
#     # hover_name='subreddit_name',
#     # hover_data=[k_clusters_to_plot_],
#     # text='subreddit_name',  # using full text is uselss because you can't read any of them
#     text=col_sub_names_cluster_seeds,
#     custom_data=l_custom_text_,
# )

# fig.update_layout(
#     width=1600,
#     height=850,
#     autosize=False,
#     yaxis=dict(showgrid=False, zeroline=False,),
#     xaxis=dict(showgrid=False, zeroline=False),
#     plot_bgcolor='#040404',  # dark-gray: '#1a1a1a' '#fcfcfc'
# )
# fig.update_traces(
#     hovertemplate=sub_hovertemplate,
#     marker=dict(
#         # size=(df_tsne_and_meta['users_l28'] / 1e6) + np.log(10 + df_tsne_and_meta['users_l28']),  # np.log(1000 + df_tsne_and_meta['users_l28']),
#         # sizeref is weird, can't get it to behave as expected, so log is good enough for now
#         sizemin=.5,
#         line=dict(
#             width=0,
#         ),
#     ),
#     textfont=dict(
#         color='#fcfcfc',  # '#fcfcfc' '#1a1a1a'
#         size=14,
#         # bgcolor='#ababab',
#         # opacity=0.2,
#     ),
# )

# fig.show(
#     # renderer='png',
# )

### India detailed

In [37]:
# k_clusters_to_plot_ = 'k_0050_label'
# col_sub_names_cluster_seeds = 'subreddit_name_seeds_india_k_0500'

# l_custom_text_ = [
#     'subreddit_name',
#     'users_l28',
# ]
# sub_hovertemplate = "<br>".join(
#     [
#         "<b>%{customdata[0]}</b>",
#         "Users L28: %{customdata[1]:,.0f}"
#     ]
# )
# fig = px.scatter(
#     data_frame=df_tsne_and_meta,
#     x='tsne_0',
#     y='tsne_1',
#     color=col_country_india_relevant_,
#     opacity=0.6,
#     size_max=11,
#     size=(df_tsne_and_meta['users_l28'] / 1e6) + np.log(10 + df_tsne_and_meta['users_l28']),
#     # hover_name='subreddit_name',
#     # hover_data=[k_clusters_to_plot_],
#     # text='subreddit_name',  # using full text is uselss because you can't read any of them
#     text=col_sub_names_cluster_seeds,
#     custom_data=l_custom_text_,
# )

# fig.update_layout(
#     width=1600,
#     height=850,
#     autosize=False,
#     yaxis=dict(showgrid=False, zeroline=False,),
#     xaxis=dict(showgrid=False, zeroline=False),
#     plot_bgcolor='#040404',  # dark-gray: '#1a1a1a' '#fcfcfc'
# )
# fig.update_traces(
#     hovertemplate=sub_hovertemplate,
#     marker=dict(
#         # size=(df_tsne_and_meta['users_l28'] / 1e6) + np.log(10 + df_tsne_and_meta['users_l28']),  # np.log(1000 + df_tsne_and_meta['users_l28']),
#         # sizeref is weird, can't get it to behave as expected, so log is good enough for now
#         sizemin=.5,
#         line=dict(
#             width=0,
#         ),
#     ),
#     textfont=dict(
#         color='#fcfcfc',  # '#fcfcfc' '#1a1a1a'
#         size=16,
#         # bgcolor='#ababab',
#         # opacity=0.2,
#     ),
# )

# fig.show(
#     # renderer='png',
# #     width=1200,
# #     height=750,
# )

# Interactive plot

It might not actually work unless I use dash?
- https://community.plotly.com/t/restyle-color-in-plotly-express/47835/7

In [31]:
# # define base columns to plot
# k_clusters_to_plot_ = 'k_0050_label'
# col_sub_names_cluster_seeds = 'subreddit_name_seeds_k_0050'
# df_plot_ = df_tsne_and_meta.sample(n=1000)

# # Create figure
# # fig = go.Figure()


# # Add scatter trace
# l_custom_text_ = [
#     'subreddit_name',
#     k_clusters_to_plot_,
# ]
# fig = px.scatter(
#     data_frame=df_plot_,
#     x='tsne_0',
#     y='tsne_1',
#     # color=k_clusters_to_plot_,
#     opacity=0.6,
#     size_max=11,
#     size=(df_plot_['users_l28'] / 1e6) + np.log(10 + df_plot_['users_l28']),
#     # hover_name='subreddit_name',
#     # hover_data=[k_clusters_to_plot_],
#     # text='subreddit_name',  # using full text is uselss because you can't read any of them
#     text=col_sub_names_cluster_seeds,
#     custom_data=l_custom_text_,
# )

# sub_hovertemplate = "<br>".join(
#     [
#         "<b>%{customdata[0]}</b>",
#         "cluster ID:%{customdata[1]}"
#     ]
# )
# # fig.update_layout(
# #     width=1200,
# #     height=650,
# #     autosize=False,
# #     yaxis=dict(showgrid=False, zeroline=False,),
# #     xaxis=dict(showgrid=False, zeroline=False),
# #     plot_bgcolor='#040404',  # dark-gray: '#1a1a1a' '#fcfcfc'
# # )
# fig.update_traces(
#     hovertemplate=sub_hovertemplate,
#     marker=dict(
#         # size=(df_tsne_and_meta['users_l28'] / 1e6) + np.log(10 + df_tsne_and_meta['users_l28']),  # np.log(1000 + df_tsne_and_meta['users_l28']),
#         # sizeref is weird, can't get it to behave as expected, so log is good enough for now
#         sizemin=.5,
#         line=dict(
#             width=0,
#         ),
#     ),
#     textfont=dict(
#         color='#fcfcfc',  # '#fcfcfc' '#1a1a1a'
#         size=14,
#         # bgcolor='#ababab',
#         # opacity=0.2,
#     ),
# )


# # Add dropdowns
# button_layer_1_height = 1.08
# fig.update_layout(
#     updatemenus=[
#         dict(
#             buttons=list([
#                 dict(
#                     args=[{"color": "k_0010_label"}],
#                     label="10 clusters",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=[{"color": df_tsne_and_meta[col_country_india_relevant_].to_list()}],
#                     label="India Relevant",
#                     method="restyle"
#                 ),
#                 dict(
#                     args=[{"color": "k_0060_label"}],
#                     label="60 clusters",
#                     method="relayout"
#                 ),
#                 dict(
#                     args=[{"color": "k_0100_label"}],
#                     label="100 clusters",
#                     method="restyle"
#                 ),
#             ]),
#             direction="down",
#             pad={"r": 10, "t": 10},
#             showactive=True,
#             x=0.1,
#             xanchor="left",
#             y=button_layer_1_height,
#             yanchor="top"
#         ),
# #         dict(
# #             buttons=list([
# #                 dict(
# #                     args=[{"contours.showlines": False, "type": "contour"}],
# #                     label="Hide lines",
# #                     method="restyle"
# #                 ),
# #                 dict(
# #                     args=[{"contours.showlines": True, "type": "contour"}],
# #                     label="Show lines",
# #                     method="restyle"
# #                 ),
# #             ]),
# #             direction="down",
# #             pad={"r": 10, "t": 10},
# #             showactive=True,
# #             x=0.58,
# #             xanchor="left",
# #             y=button_layer_1_height,
# #             yanchor="top"
# #         ),
#     ]
# )
# fig.update_layout(
#     annotations=[
#         dict(text="Color by:", x=0, xref="paper", y=1.06, yref="paper",
#                              align="left", showarrow=False),
# #         dict(text="Reverse<br>Colorscale", x=0.25, xref="paper", y=1.07,
# #                              yref="paper", showarrow=False),
# #         dict(text="Lines", x=0.54, xref="paper", y=1.06, yref="paper",
# #                              showarrow=False)
#     ]
# )

# fig.show(
#     # renderer='png',
#     width=1100,
#     height=650,
# )