In [18]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import matplotlib.gridspec as gridspec
from scipy.io import arff
from sklearn.preprocessing import LabelEncoder

In [19]:
artists_path = os.path.join('D:\\', 'artists')
original_datasets_path = os.path.abspath(artists_path)

# Load data
def load_data(artists_datasets_path: str):
    artist_data = pd.read_csv(artists_datasets_path, delimiter=",")  # Intenta también con "\t"
    return artist_data


# Load data 
artists_datasets_path = os.path.join(artists_path, 'map_df_filtered.csv')
map_df_filtered = load_data(artists_datasets_path)

# Show first records and DataFrame information
display(map_df_filtered.head())


Unnamed: 0,artist_name,e.country,e.country_3,num_exhibitions
0,1 unknown,BE,BEL,2
1,2 unknown,FR,FRA,1
2,A L Millich,GB,GBR,2
3,A Molitor,BE,BEL,2
4,A. A. Baryshnikov,RU,RUS,1


In [20]:
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

# Count the number of records per artist
artist_counts = map_df_filtered['artist_name'].value_counts().reset_index()
artist_counts.columns = ['artist_name', 'count']

# Select the top 5 artists with the most records
top_5_artists = artist_counts.head(5)
print("Top 5 artists with the most records:")
print(top_5_artists)

# Select the bottom 5 artists with the fewest records
bottom_5_artists = artist_counts.tail(5)
print("\nBottom 5 artists with the fewest records:")
print(bottom_5_artists)

# Combine the top 5 and bottom 5 artists into a single list
selected_artists = pd.concat([top_5_artists, bottom_5_artists])

# Filter the original DataFrame to include only the selected artists
filtered_df = map_df_filtered[map_df_filtered['artist_name'].isin(selected_artists['artist_name'])]

# Display the filtered DataFrame
print("\nFiltered DataFrame with Top 5 and Bottom 5 artists:")
display(filtered_df)



Top 5 artists with the most records:
             artist_name  count
0          Maurice Denis     14
1         Pierre Bonnard     14
2            Paul Signac     14
3       Edouard Vuillard     14
4  Henri Charles Manguin     14

Bottom 5 artists with the fewest records:
                       artist_name  count
13261    Émile Norman Saint-Marcel      1
13262      Émile Pierre Metzmacher      1
13263  Émile Pierre de La Montagne      1
13264                Émile Pinchon      1
13265                 Émile Raulin      1

Filtered DataFrame with Top 5 and Bottom 5 artists:


Unnamed: 0,artist_name,e.country,e.country_3,num_exhibitions
4553,Edouard Vuillard,AT,AUT,9
4554,Edouard Vuillard,BE,BEL,43
4555,Edouard Vuillard,CH,CHE,6
4556,Edouard Vuillard,CZ,CZE,10
4557,Edouard Vuillard,DE,DEU,93
...,...,...,...,...
20227,Émile Norman Saint-Marcel,FR,FRA,8
20228,Émile Pierre Metzmacher,FR,FRA,1
20229,Émile Pierre de La Montagne,BE,BEL,7
20230,Émile Pinchon,FR,FRA,15


In [21]:

filtered_df['log_num_exhibitions'] = np.log1p(filtered_df['num_exhibitions'])

# Determine the fixed color range for all artists
log_color_min = filtered_df['log_num_exhibitions'].min()
log_color_max = filtered_df['log_num_exhibitions'].max()

# Initialize the figure with the data of the first artist
first_artist = filtered_df['artist_name'].unique()[0]
artist_data = filtered_df[filtered_df['artist_name'] == first_artist]

fig = go.Figure(data=go.Choropleth(
    locations=artist_data['e.country_3'],  # ISO-3 country codes
    z=artist_data['log_num_exhibitions'],  # Log-transformed number of exhibitions
    text=artist_data['e.country'],        # Country name
    colorscale='Plasma',
    zmin=log_color_min,
    zmax=log_color_max,
    colorbar=dict(title="Exhibitions (Log Scale)")
))

# Add buttons for the dropdown menu
buttons = []
for artist in filtered_df['artist_name'].unique():
    artist_data = filtered_df[filtered_df['artist_name'] == artist]
    buttons.append(
        dict(
            label=artist,
            method="update",
            args=[
                {"z": [artist_data['log_num_exhibitions']],
                 "locations": [artist_data['e.country_3']],
                 "text": [artist_data['e.country']]},
                {"title": f"Exhibition Distribution for {artist}"}
            ]
        )
    )

# Update layout with dropdown menu and fixed color bar
fig.update_layout(
    updatemenus=[{
        "buttons": buttons,
        "direction": "down",
        "showactive": True,
        "x": 0.1,
        "y": 1
    }],
    geo=dict(
        projection_type="natural earth",
        showcoastlines=True, coastlinecolor="Black",
        showland=True, landcolor="lightgray"
    ),
    title=f"Exhibition Distribution for {first_artist}",
    coloraxis_colorbar=dict(
        title="Exhibitions (Log Scale)"
    )
)

# Save the figure
fig.write_html("interactive_artist_fixed_color_map.html")

# Show the figure
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

