In [9]:
import pandas as pd

df = pd.read_csv('data/artvis.csv', sep=';')
df[['e.latitude', 'e.longitude']] = df[['e.latitude', 'e.longitude']].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=['e.latitude', 'e.longitude', 'e.startdate'])
df = df[df['e.city'] != '-']
df[['e.latitude', 'e.longitude']] = df[['e.latitude', 'e.longitude']].astype(float)
df.head()

Unnamed: 0,a.id,a.firstname,a.lastname,a.gender,a.birthdate,a.deathdate,a.birthplace,a.deathplace,a.nationality,e.id,e.title,e.venue,e.startdate,e.type,e.paintings,e.country,e.city,e.latitude,e.longitude
0,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,489,"Second Post-Impressionist Exhibition. British,...",Grafton Galleries,1912,group,4,GB,London,51.514248,-0.093145
1,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,618,Exhibition of the Camden Town Group and Others,Public Art Galleries,1913,group,6,GB,Brighton,50.833333,-0.15
2,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,720,The Second Exhibition of Works by Members of t...,Goupil Gallery,1915,group,4,GB,London,51.514248,-0.093145
3,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,729,Third Exhibition of Works by Members of the Lo...,Goupil Gallery,1915,group,5,GB,London,51.514248,-0.093145
4,1,William Bernard,Adeney,M,1878-01-01,1966-01-01,London,London,GB,650,The First Exhibition of Works by Members of Th...,Goupil Gallery,1914,group,5,GB,London,51.514248,-0.093145


In [10]:
import pandas as pd
import plotly.express as px
print(df.columns)
# df['e.latitude'] = df['e.latitude'].apply(pd.to_numeric, errors='coerce')  # Converts valid numbers, non-numeric values become NaN
# df['e.longitude'] = df['e.longitude'].apply(pd.to_numeric, errors='coerce')  # Converts valid numbers, non-numeric values become NaN
# df = df.dropna(subset=['e.latitude', 'e.longitude']) 

# df['e.latitude'] = df['e.latitude'].astype(float)
# df['e.longitude'] = df['e.longitude'].astype(float)

location_counts = df.groupby(['e.latitude', 'e.longitude']).size().reset_index(name='count')

# Plotting with plotly express
fig = px.scatter_mapbox(
    location_counts, lat="e.latitude", lon="e.longitude", 
    size="count",  # Use count for marker size
    zoom=4, height=600,
    mapbox_style="carto-positron"
)

# Show the figure
fig.show()


Index(['a.id', 'a.firstname', 'a.lastname', 'a.gender', 'a.birthdate',
       'a.deathdate', 'a.birthplace', 'a.deathplace', 'a.nationality', 'e.id',
       'e.title', 'e.venue', 'e.startdate', 'e.type', 'e.paintings',
       'e.country', 'e.city', 'e.latitude', 'e.longitude'],
      dtype='object')


The coordinates correspond to the cities. There is the same number of different lon x lat combinations as there are cities (city "-" means unknown and needs to be removed).

In [11]:
unique_combinations = df[['e.latitude', 'e.longitude']].drop_duplicates()

# Get the number of unique combinations
num_unique_combinations = unique_combinations.shape[0]
num_unique_venues = df['e.venue'].nunique()
num_unique_cities = df['e.city'].nunique()
print(f"Number of unique lon x lat combinations: {num_unique_combinations}")
print(f"Number of unique venues: {num_unique_venues}")
print(f"Number of unique venues: {num_unique_cities}")

print(df['e.city'].unique())

Number of unique lon x lat combinations: 82
Number of unique venues: 311
Number of unique venues: 82
['London' 'Brighton' 'Rotterdam' 'Amsterdam' 'Cologne' 'Saint Petersburg'
 'Moscow' 'Kiev' 'Berlin' 'Dresden' 'Weggis' 'Budapest' 'Vienna' 'Luzern'
 'Munich' 'Zurich' 'Bremen' 'Weimar' 'Odessa' 'New York' 'Riga' 'Angers'
 'Tokyo' 'Paris' 'Dusseldorf' 'Ekaterinodar' 'Chicago' 'Boston' 'Domburg'
 'Arnhem' 'Brussels' 'Prague' 'Nijmegen' 'Rome' 'Venice' 'Milan'
 'Florence' 'Naples' 'San Francisco' 'Leipzig' 'Darmstadt' 'Mannheim'
 'Pittsburgh' 'Ghent' 'Hamburg' 'Essen' 'Marseille' 'Frankfurt am Main'
 'Vilnius' 'Brandenburg an der Havel' 'Strasbourg' 'Basel' 'Stuttgart'
 'Liverpool' 'Antwerp' 'Ostend' 'Cincinnati' 'Rouen' 'Barcelona'
 'Stockholm' 'Gothenburg' 'Hague, The' 'Hodonín' 'Kladno' 'Prostějov'
 'Olomouc' 'Bielefeld' 'Breslau' 'Jena' 'Chemnitz' 'Krefeld' 'Magdeburg'
 'Hagen' 'Christiania' 'Utrecht' 'Cracow' 'Glasgow' 'Hradec Králové'
 'Kroměříž' 'Chrudim' 'Valašské Meziříčí' 'Toulou

In [12]:
df["e.startdate"].unique()

array([1912, 1913, 1915, 1914, 1916, 1910, 1911, 1909, 1907, 1906, 1905,
       1908, 1904, 1902, 1903], dtype=int64)

In [20]:
unique_artists_df = df.drop_duplicates(subset='a.id', keep='first')
moved_artists_df = unique_artists_df[unique_artists_df["a.birthplace"] != unique_artists_df["a.deathplace"]]
moved_artists_df = moved_artists_df[(moved_artists_df['a.birthplace'] != '\\N') & (moved_artists_df['a.birthplace'] != '\\N')]
display(moved_artists_df)

Unnamed: 0,a.id,a.firstname,a.lastname,a.gender,a.birthdate,a.deathdate,a.birthplace,a.deathplace,a.nationality,e.id,e.title,e.venue,e.startdate,e.type,e.paintings,e.country,e.city,e.latitude,e.longitude
7,2,Peter,Alma,M,1886-01-01,1969-01-01,Medan,Amsterdam,NL,716,"Tentoonstelling Alma, Le Fauconnier en Mondrian",Rotterdamsche Kunstkring,1915,group,6,NL,Rotterdam,51.916667,4.5000
14,3,Natan,Alt'man,M,1889-01-01,1970-01-01,Vinnitsa,Saint Petersburg,RU,1482,Мир Искусства. Выставка картин,Dom Obshestva Pooshrfenija hudozhestv,1913,group,4,RU,Saint Petersburg,59.883300,30.2500
26,4,Cuno,Amiet,M,1868-01-01,1961-01-01,Solothurn,Seeberg,CH,312,"Ausstellung der K. G. ""Brücke""",Galerie Ernst Arnold,1910,group,2,DE,Dresden,51.050000,13.7500
57,5,Vassily,Kandinsky,M,1866-01-01,1944-01-01,Moscow,Neuilly-sur-Seine,RU,311,Ausstellung des Sonderbundes Westdeutscher Kun...,Sonderbund Westdeutscher Kunstfreunde und Küns...,1910,group,3,DE,Cologne,50.933333,6.9500
154,6,Kazimir,Malevich,M,1878-01-01,1935-01-01,Kiev,Saint Petersburg,RU,732,"Последняя футуристическая выставка картин 0,10...",Художественное Бюро Добычиной,1916,group,40,RU,Saint Petersburg,59.883300,30.2500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72069,13994,Robert Van Vorst,Sewell,M,1860-01-01,1924-01-01,New York,Florence,US,296,Fourteenth Annual Exhibition,Carnegie Institute,1910,group,1,US,Pittsburgh,40.433300,-79.9833
72070,13995,Abbott Handerson,Thayer,M,1849-01-01,1921-01-01,Boston,Dublin,US,296,Fourteenth Annual Exhibition,Carnegie Institute,1910,group,1,US,Pittsburgh,40.433300,-79.9833
72071,13996,Dwight William,Tryon,M,1849-01-01,1925-01-01,"Hartford, Connecticut",South Dartmouth,US,296,Fourteenth Annual Exhibition,Carnegie Institute,1910,group,1,US,Pittsburgh,40.433300,-79.9833
72072,13997,Louis David,Vaillant,M,1875-01-01,1944-01-01,"Cleveland, Ohio",\N,US,296,Fourteenth Annual Exhibition,Carnegie Institute,1910,group,1,US,Pittsburgh,40.433300,-79.9833
