In [1]:
from utils import get_current_dir
import pandas as pd

inputpath = get_current_dir().parent / "data" / "artvis_cleaned.csv"
df = pd.read_csv(inputpath)

df = df.replace("null", pd.NA)
display(df.head())


df["a.id"] = df["a.id"].astype("Int64")
df = df.dropna(subset=["a.id"])

df["a.firstname"] = df["a.firstname"].astype("string")
df["a.firstname"] = df["a.firstname"].fillna("Unknown Firstname")

df["a.lastname"] = df["a.lastname"].astype("string")
df["a.lastname"] = df["a.lastname"].fillna("Unknown Lastname")

df["a.gender"] = pd.Categorical(df["a.gender"]).add_categories("Unknown Gender")
df["a.gender"] = df["a.gender"].fillna("Unknown Gender")

df["a.birthdate"] = pd.to_datetime(df["a.birthdate"], errors="coerce")

df["a.deathdate"] = pd.to_datetime(df["a.deathdate"], errors="coerce")

df["a.birthplace"] = df["a.birthplace"].astype("string")
df["a.birthplace"] = df["a.birthplace"].fillna("Unknown Birthplace")

df["a.deathplace"] = df["a.deathplace"].astype("string")
df["a.deathplace"] = df["a.deathplace"].fillna("Unknown Deathplace")

df["a.nationality"] = df["a.nationality"].astype("string")
df["a.nationality"] = df["a.nationality"].fillna("Unknown Nationality")

df["e.id"] = df["e.id"].astype("Int64")
df = df.dropna(subset=["e.id"])

df["e.title"] = df["e.title"].astype("string")
df["e.title"] = df["e.title"].fillna("Unknown Title")

df["e.venue"] = df["e.venue"].astype("string")
df["e.venue"] = df["e.venue"].fillna("Unknown Venue")

df["e.startdate"] = df["e.startdate"].astype("Int64")
df["e.startdate"] = df["e.startdate"].fillna(0)

df["e.type"] = pd.Categorical(df["e.type"]).add_categories("Unknown Type")
df["e.type"] = df["e.type"].fillna("Unknown Type")

df["e.paintings"] = df["e.paintings"].astype("Int64")
df["e.paintings"] = df["e.paintings"].fillna(0)

df["e.country"] = df["e.country"].astype("string")
df["e.country"] = df["e.country"].fillna("Unknown Country")

df["e.city"] = df["e.city"].astype("string")
df["e.city"] = df["e.city"].fillna("Unknown City")

df["e.latitude"] = df["e.latitude"].astype("float64")
df["e.latitude"] = df["e.latitude"].fillna(0)

df["e.longitude"] = df["e.longitude"].astype("float64")
df["e.longitude"] = df["e.longitude"].fillna(0)


print(df.head())
for col in df.columns:
    print(f"column: {col}, type: {df[col].dtype}, unique values: {df[col].nunique()}, null values: {df[col].isnull().sum()}")

Unnamed: 0,a.id,a.firstname,a.lastname,a.gender,a.birthdate,a.deathdate,a.birthplace,a.deathplace,a.nationality,e.id,e.title,e.venue,e.startdate,e.type,e.paintings,e.country,e.city,e.latitude,e.longitude
0,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,618,Exhibition of the Camden Town Group and Others,Public Art Galleries,1913,group,6,GB,Brighton,50.833333,-0.15
1,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,720,The Second Exhibition of Works by Members of t...,Goupil Gallery,1915,group,4,GB,London,51.514248,-0.093145
2,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,729,Third Exhibition of Works by Members of the Lo...,Goupil Gallery,1915,group,5,GB,London,51.514248,-0.093145
3,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,650,The First Exhibition of Works by Members of Th...,Goupil Gallery,1914,group,5,GB,London,51.514248,-0.093145
4,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,680,Twentieth Century Art. A Review of Modern Move...,Whitechapel Art Gallery,1914,group,3,GB,London,51.514248,-0.093145


   a.id      a.firstname a.lastname a.gender a.birthdate a.deathdate  \
0     1  William Bernard     Adeney        M  1878-01-01  1966-01-01   
1     1  William Bernard     Adeney        M  1878-01-01  1966-01-01   
2     1  William Bernard     Adeney        M  1878-01-01  1966-01-01   
3     1  William Bernard     Adeney        M  1878-01-01  1966-01-01   
4     1  William Bernard     Adeney        M  1878-01-01  1966-01-01   

  a.birthplace a.deathplace a.nationality  e.id  \
0       London       London            GB   618   
1       London       London            GB   720   
2       London       London            GB   729   
3       London       London            GB   650   
4       London       London            GB   680   

                                             e.title                  e.venue  \
0     Exhibition of the Camden Town Group and Others     Public Art Galleries   
1  The Second Exhibition of Works by Members of t...           Goupil Gallery   
2  Third Exhibiti

In [2]:
outputpath = get_current_dir().parent / "pages"

# Vega Altair

In [3]:
import altair as alt
import pandas as pd

alt.data_transformers.enable("vegafusion")

city_stats = df.groupby(['e.city', 'e.country', 'e.latitude', 'e.longitude']).agg({
    'e.id': 'count',
    'e.paintings': 'sum'
}).reset_index()
city_stats.columns = ['city', 'country', 'latitude', 'longitude', 'num_exhibitions', 'total_paintings']

width = 800
height = 400

click = alt.selection_point(fields=['city'])

points = alt.Chart(city_stats).mark_circle().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.Size('num_exhibitions:Q', 
                 scale=alt.Scale(range=[100, 1000]),
                 title='Number of Exhibitions'),
    color=alt.condition(click, 
                       alt.Color('total_paintings:Q', scale=alt.Scale(scheme='viridis')),
                       alt.value('gray')),
    tooltip=[
        alt.Tooltip('city:N', title='City'),
        alt.Tooltip('country:N', title='Country'),
        alt.Tooltip('num_exhibitions:Q', title='Number of Exhibitions'),
        alt.Tooltip('total_paintings:Q', title='Total Paintings')
    ]
).properties(
    width=width,
    height=height
).add_selection(click)

top_cities = alt.Chart(city_stats).mark_bar().encode(
    x=alt.X('num_exhibitions:Q', title='Number of Exhibitions'),
    y=alt.Y('city:N', sort='-x', title='City'),
    color=alt.condition(click, 
                       alt.Color('total_paintings:Q', scale=alt.Scale(scheme='viridis')),
                       alt.value('gray')),
    tooltip=[
        alt.Tooltip('city:N', title='City'),
        alt.Tooltip('num_exhibitions:Q', title='Number of Exhibitions'),
        alt.Tooltip('total_paintings:Q', title='Total Paintings')
    ]
).transform_window(
    rank='rank(num_exhibitions)',
    sort=[alt.SortField('num_exhibitions', order='descending')]
).transform_filter(
    alt.datum.rank <= 10
).properties(
    width=width//2,
    height=height//2
).add_selection(click)

final_vis = alt.vconcat(
    points,
    top_cities,
    title="Exhibition Locations and Statistics"
).configure_view(
    stroke=None
).configure_title(
    fontSize=20,
    anchor='middle'
)

alt.renderers.enable('jupyter')
display(alt.JupyterChart(final_vis))

  ).add_selection(click)
  ).add_selection(click)


JupyterChart()

In [5]:
import altair as alt
import pandas as pd

alt.data_transformers.enable("vegafusion")

artist_stats = df.groupby(['a.nationality', 'a.gender']).agg({
    'a.id': 'count',
    'e.paintings': 'sum'
}).reset_index()
artist_stats.columns = ['nationality', 'gender', 'num_artists', 'total_paintings']

selection = alt.selection_point(fields=['nationality'])

width = 800
height = 400

nationality_chart = alt.Chart(artist_stats).mark_bar().encode(
    x=alt.X('nationality:N', sort='-y', title='Nationality'),
    y=alt.Y('num_artists:Q', title='Number of Artists'),
    color=alt.condition(selection,
                       alt.Color('gender:N', scale=alt.Scale(scheme='category10')),
                       alt.value('lightgray')),
    tooltip=[
        alt.Tooltip('nationality:N', title='Nationality'),
        alt.Tooltip('gender:N', title='Gender'),
        alt.Tooltip('num_artists:Q', title='Number of Artists'),
        alt.Tooltip('total_paintings:Q', title='Total Paintings')
    ]
).properties(
    width=width,
    height=height
).add_selection(selection)

paintings_chart = alt.Chart(artist_stats).mark_circle(opacity=0.7).encode(
    x=alt.X('num_artists:Q', title='Number of Artists'),
    y=alt.Y('total_paintings:Q', title='Total Paintings'),
    size=alt.Size('num_artists:Q', scale=alt.Scale(range=[100, 1000])),
    color=alt.condition(selection,
                       alt.Color('gender:N', scale=alt.Scale(scheme='category10')),
                       alt.value('lightgray')),
    tooltip=[
        alt.Tooltip('nationality:N', title='Nationality'),
        alt.Tooltip('gender:N', title='Gender'),
        alt.Tooltip('num_artists:Q', title='Number of Artists'),
        alt.Tooltip('total_paintings:Q', title='Total Paintings')
    ]
).properties(
    width=width//2,
    height=height//2
).add_selection(selection)

final_visualization = alt.vconcat(
    nationality_chart,
    paintings_chart,
    title="Artist Distribution by Nationality and Gender"
).configure_view(
    stroke=None
).configure_title(
    fontSize=20,
    anchor='middle'
)

alt.renderers.enable('jupyter')
display(alt.JupyterChart(final_visualization))

  artist_stats = df.groupby(['a.nationality', 'a.gender']).agg({
  ).add_selection(selection)
  ).add_selection(selection)


JupyterChart()

In [4]:
import altair as alt
import pandas as pd


alt.data_transformers.enable("vegafusion")

exhibitions_by_year = (
    df.groupby(['e.startdate', 'e.country', 'e.city'])
    .agg({
        'e.paintings': 'sum',
        'e.id': 'count'
    })
    .reset_index()
    .rename(columns={
        'e.id': 'num_exhibitions',
        'e.paintings': 'total_paintings'
    })
)

scatter = alt.Chart(exhibitions_by_year).mark_circle(size=60).encode(
    x=alt.X('num_exhibitions:Q', title='Number of Exhibitions'),
    y=alt.Y('total_paintings:Q', title='Total Paintings'),
    tooltip=['e.city:N', 'num_exhibitions:Q', 'total_paintings:Q']
).properties(
    width=1000,
    height=400,
    title='Exhibitions vs Paintings'
)

scatter

JupyterChart()