In [24]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import matplotlib.gridspec as gridspec
from scipy.io import arff
from sklearn.preprocessing import LabelEncoder

In [25]:
artists_path = os.path.join('D:\\', 'artists')
original_datasets_path = os.path.abspath(artists_path)

# Load data
def load_data(artists_datasets_path: str):
    artist_data = pd.read_csv(artists_datasets_path, delimiter=",")  # Intenta también con "\t"
    return artist_data


# Load data 
artists_datasets_path = os.path.join(artists_path, 'year_df_filtered.csv')
year_df_filtered = load_data(artists_datasets_path)

# Show first records and DataFrame information
display(year_df_filtered.head())


Unnamed: 0,artist_name,e.country,e.country_3,e.startdate,num_exhibitions
0,1 unknown,BE,BEL,1908,2
1,2 unknown,FR,FRA,1906,1
2,A L Millich,GB,GBR,1907,2
3,A Molitor,BE,BEL,1914,2
4,A. A. Baryshnikov,RU,RUS,1914,1


In [26]:
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

# Count the number of records per artist
artist_counts = year_df_filtered['artist_name'].value_counts().reset_index()
artist_counts.columns = ['artist_name', 'count']

# Select the top 5 artists with the most records
top_5_artists = artist_counts.head(5)
print("Top 5 artists with the most records:")
print(top_5_artists)

# Select the bottom 5 artists with the fewest records
bottom_5_artists = artist_counts.tail(5)
print("\nBottom 5 artists with the fewest records:")
print(bottom_5_artists)

# Combine the top 5 and bottom 5 artists into a single list
selected_artists = pd.concat([top_5_artists, bottom_5_artists])

# Filter the original DataFrame to include only the selected artists
filtered_df = year_df_filtered[year_df_filtered['artist_name'].isin(selected_artists['artist_name'])]

# Display the filtered DataFrame
print("\nFiltered DataFrame with Top 5 and Bottom 5 artists:")
display(filtered_df)



Top 5 artists with the most records:
             artist_name  count
0       Edouard Vuillard     47
1         Pierre Bonnard     44
2          Maurice Denis     43
3  Pierre-Auguste Renoir     42
4          Henri Matisse     42

Bottom 5 artists with the fewest records:
                     artist_name  count
13261  Émile Norman Saint-Marcel      1
13262    Émile Pierre Metzmacher      1
13263            Helen Whitfield      1
13264          Helena Blankstein      1
13265               Émile Raulin      1

Filtered DataFrame with Top 5 and Bottom 5 artists:


Unnamed: 0,artist_name,e.country,e.country_3,e.startdate,num_exhibitions
8350,Edouard Vuillard,AT,AUT,1909,4
8351,Edouard Vuillard,AT,AUT,1912,1
8352,Edouard Vuillard,AT,AUT,1913,2
8353,Edouard Vuillard,AT,AUT,1914,2
8354,Edouard Vuillard,BE,BEL,1908,6
...,...,...,...,...,...
30276,Pierre-Auguste Renoir,US,USA,1912,1
30277,Pierre-Auguste Renoir,US,USA,1913,11
36747,Émile Norman Saint-Marcel,FR,FRA,1907,8
36748,Émile Pierre Metzmacher,FR,FRA,1906,1


In [27]:
# Log transformation for exhibitions
filtered_df['log_num_exhibitions'] = np.log1p(filtered_df['num_exhibitions'])

# Función para actualizar los datos según artista y año
def get_artist_year_data(artist, year):
    """Filtra el DataFrame por artista y año y devuelve los datos."""
    data = filtered_df[(filtered_df['artist_name'] == artist) & (filtered_df['e.startdate'] == year)]
    return dict(
        locations=data['e.country_3'],
        z=data['log_num_exhibitions'],
        text=data['e.country'],
    )

# Obtener artistas y años únicos
artists = filtered_df['artist_name'].unique()
years = sorted(filtered_df['e.startdate'].unique())

# Inicializar figura
fig = go.Figure()

# Trazas iniciales: primer artista y primer año
initial_artist = artists[0]
initial_year = years[0]
initial_data = get_artist_year_data(initial_artist, initial_year)
fig.add_trace(go.Choropleth(
    locations=initial_data['locations'],
    z=initial_data['z'],
    text=initial_data['text'],
    colorscale="Plasma",
    zmin=0,
    zmax=filtered_df['log_num_exhibitions'].max(),
    marker_line_color='white',
    marker_line_width=0.5
))

# Dropdown para seleccionar artistas
artist_buttons = []
for artist in artists:
    artist_buttons.append(dict(
        label=artist,
        method="update",
        args=[
            {"z": [get_artist_year_data(artist, initial_year)['z']],
             "locations": [get_artist_year_data(artist, initial_year)['locations']],
             "text": [get_artist_year_data(artist, initial_year)['text']]},
            {"title": f"Exhibition Distribution for {artist}, Year {initial_year}"}
        ]
    ))

# Slider para actualizar el año
steps = []
for year in years:
    step = dict(
        method="update",
        args=[
            {"z": [get_artist_year_data(initial_artist, year)['z']],
             "locations": [get_artist_year_data(initial_artist, year)['locations']],
             "text": [get_artist_year_data(initial_artist, year)['text']]},
            {"title": f"Exhibition Distribution for {initial_artist}, Year {year}"}
        ],
        label=str(year)
    )
    steps.append(step)

# Configurar layout
fig.update_layout(
    title=f"Exhibition Distribution for {initial_artist}, Year {initial_year}",
    updatemenus=[{
        "buttons": artist_buttons,
        "direction": "down",
        "showactive": True,
        "x": 0.015,
        "xanchor": "left",
        "y": 1,
        "yanchor": "top"
    }],
    sliders=[{
        "active": 0,
        "steps": steps,
        "x": 0.015,
        "len": 0.9,
        "currentvalue": {"prefix": "Year: "}
    }],
    geo=dict(
        projection_type="natural earth",
        showland=True, landcolor="lightgray",
        showcoastlines=True, coastlinecolor="Black"
    ),
    coloraxis_colorbar=dict(title="Exhibitions (Log Scale)")
)

# Mostrar y guardar
fig.write_html("interactive_artist_year_filtered.html")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

