In [2]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from collections import defaultdict
import requests
import geopandas as gpd

In [3]:
artist_url = "https://github.com/MuseumofModernArt/collection/blob/main/Artists.csv?raw=true"
artwork_url = "https://github.com/MuseumofModernArt/collection/blob/main/Artworks.csv?raw=true"
artist_df = pd.read_csv(artist_url)
artwork_df = pd.read_csv(artwork_url)

In [4]:
female_artists = artist_df[artist_df['Gender'] == 'female']
female_artists.dropna(subset=['ConstituentID'])

Unnamed: 0,ConstituentID,DisplayName,ArtistBio,Nationality,Gender,BeginDate,EndDate,Wiki QID,ULAN
8,10,Irene Aronson,"American, born Germany 1918",American,female,1918,0,Q19748568,500042413.0
16,21,Ruth Asawa,"American, 1926–2013",American,female,1926,2013,Q7382874,500077806.0
17,22,Isidora Aschheim,Israeli,Israeli,female,0,0,,
23,28,Geneviève Asse,"French, born 1923",French,female,1923,0,Q1978192,500025324.0
25,31,Dana Atchley,"American, 1941–2000",American,female,1941,2000,,
...,...,...,...,...,...,...,...,...,...
15612,137853,Silvia Rosi,"Italian and Togolese, born 1992",Italian,female,1992,0,,
15617,138111,Ulla Wiggen,"Swedish, born 1942",Swedish,female,1942,0,,
15618,138280,Teresita Brigitte Cochran,"American, born 1975",American,female,1975,0,,
15626,138366,Johanna Unzueta,"Chilean, born 1974 Santiago, Chile. Lives and ...",Chilean,female,1974,0,,


In [5]:
female_artists.dropna(subset=['Nationality'])

Unnamed: 0,ConstituentID,DisplayName,ArtistBio,Nationality,Gender,BeginDate,EndDate,Wiki QID,ULAN
8,10,Irene Aronson,"American, born Germany 1918",American,female,1918,0,Q19748568,500042413.0
16,21,Ruth Asawa,"American, 1926–2013",American,female,1926,2013,Q7382874,500077806.0
17,22,Isidora Aschheim,Israeli,Israeli,female,0,0,,
23,28,Geneviève Asse,"French, born 1923",French,female,1923,0,Q1978192,500025324.0
25,31,Dana Atchley,"American, 1941–2000",American,female,1941,2000,,
...,...,...,...,...,...,...,...,...,...
15606,137635,Ami Lien,"American, born 1987",American,female,1987,0,,
15612,137853,Silvia Rosi,"Italian and Togolese, born 1992",Italian,female,1992,0,,
15617,138111,Ulla Wiggen,"Swedish, born 1942",Swedish,female,1942,0,,
15618,138280,Teresita Brigitte Cochran,"American, born 1975",American,female,1975,0,,


In [6]:
female_artist_ids = female_artists['ConstituentID'].tolist()


In [7]:
artwork_df

Unnamed: 0,Title,Artist,ConstituentID,ArtistBio,Nationality,BeginDate,EndDate,Gender,Date,Medium,...,OnView,Circumference (cm),Depth (cm),Diameter (cm),Height (cm),Length (cm),Weight (kg),Width (cm),Seat Height (cm),Duration (sec.)
0,"Ferdinandsbrücke Project, Vienna, Austria (Ele...",Otto Wagner,6210,"(Austrian, 1841–1918)",(Austrian),(1841),(1918),(male),1896,Ink and cut-and-pasted painted pages on paper,...,,,,,48.60000,,,168.900000,,
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,"(French, born 1944)",(French),(1944),(0),(male),1987,Paint and colored pencil on print,...,,,,,40.64010,,,29.845100,,
2,"Villa project, outside Vienna, Austria (Elevat...",Emil Hoppe,7605,"(Austrian, 1876–1957)",(Austrian),(1876),(1957),(male),1903,"Graphite, pen, color pencil, ink, and gouache ...",...,,,,,34.30000,,,31.800000,,
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,"(French and Swiss, born Switzerland 1944)",(),(1944),(0),(male),1980,Photographic reproduction with colored synthet...,...,,,,,50.80000,,,50.800000,,
4,"Villa project, outside Vienna, Austria (Exteri...",Emil Hoppe,7605,"(Austrian, 1876–1957)",(Austrian),(1876),(1957),(male),1903,"Graphite, color pencil, ink, and gouache on tr...",...,,,,,38.40000,,,19.100000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157594,Box catalogue for the exhibition JAMES LEE BYA...,James Lee Byars,902,"(American, 1932–1997)",(American),(1932),(1997),(male),1977,Cardboard box containing black tissue paper,...,,,7.5,,20.00000,,,15.700000,,
157595,Box catalogue for the exhibition GEORG ETTL at...,Georg Ettl,74696,"(German, 1940–2014)",(German),(1940),(2014),(male),1978,"Cardboard box containing a typescipt, six card...",...,,,2.7,,20.20000,,,16.000000,,
157596,Box catalogue for the exhibition JANNIS KOUNEL...,Jannis Kounellis,3230,"(Greek, 1936–2017)",(Greek),(1936),(2017),(male),1978,Cardboard box containing four cards and an object,...,,,2.8,,20.30000,,,15.800000,,
157597,Panels for the Walls of the World: Phase II,Stan VanDerBeek,32424,"(American, 1927–1984)",(American),(1927),(1984),(male),1970,"Cut-and-pasted printed papers, spray paint, an...",...,,,0.0,,194.30000,,,609.600000,,


In [8]:
artwork_df['ConstituentID'] = artwork_df['ConstituentID'].str.split(',')
artwork_df = artwork_df.explode('ConstituentID') # creates singleton rows 

# 2. remove spaces 
artwork_df['ConstituentID'] = artwork_df['ConstituentID'].str.strip()

# 3. convert to numeric
artwork_df['ConstituentID'] = pd.to_numeric(artwork_df['ConstituentID'], errors='raise')

# 4. filter for female artists
# 4.1 Int64/float64 -> int64 
artwork_df['ConstituentID'] = artwork_df['ConstituentID'].astype('Int64')  # capital 'Int64' allows for NaN values

# 5. Filter artworks for female artists and get count of works
female_artworks = artwork_df[artwork_df['ConstituentID'].isin(female_artist_ids)]


In [9]:
# female_artworks

In [10]:
female_artworks.loc[:,'Nationality'] = female_artworks['Nationality'].str.replace(r'[()]', '', regex=True)


In [11]:
female_artworks

Unnamed: 0,Title,Artist,ConstituentID,ArtistBio,Nationality,BeginDate,EndDate,Gender,Date,Medium,...,OnView,Circumference (cm),Depth (cm),Diameter (cm),Height (cm),Length (cm),Weight (kg),Width (cm),Seat Height (cm),Duration (sec.)
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,"(Dutch, born 1944) (Dutch, born 1945)",Dutch Dutch,(1944) (1945),(0) (0),(male) (female),1987,Synthetic polymer paint and ink on paper,...,,,,,63.500100,,,99.060200,,
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,"(American, born Poland 1954) (American, born 1...",American American American,(1954) (1935) (1981),(0) (0) (0),(female) (male) (),1989,Computer-generated print on frosted polymer sh...,...,,,3.800000,,121.000000,,,92.700000,,
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,"(British, born Iraq. 1950–2016)",British,(1950),(2016),(female),1991,Synthetic polymer on paper mounted on canvas,...,,,,,129.540300,,,182.880400,,
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,"(Dutch, born 1944) (British, born Greece 1937)...",Dutch British British Dutch,(1944) (1937) (1937) (1945),(0) (0) (0) (0),(male) (female) (male) (female),1975,Ink and acrylic on paper,...,,,,,113.000000,,,68.600000,,
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,"(Dutch, born 1944) (British, born Greece 1937)...",Dutch British British Dutch,(1944) (1937) (1937) (1945),(0) (0) (0) (0),(male) (female) (male) (female),1975,Ink and acrylic on paper,...,,,,,113.000000,,,68.600000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,"(Argentine, born 1941)",American,(1941),(0),(female),2009,Photogravure,...,,,,,39.370079,,,33.655067,,
156912,Mickey / Novia,Liliana Porter,4703,"(Argentine, born 1941)",American,(1941),(0),(female),2009,Photogravure,...,,,,,39.370079,,,33.655067,,
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,"(French, born Bohemia. 1902–1980)",Czech,(1902),(1980),(female),1939,Ink on paper\r,...,,,0.000000,,35.500000,,,45.402591,,
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,"(German, 1931–2007) (German, 1934–2015)",German German,(1931) (1934),(2007) (2015),(male) (female),1968,cardboard box containing folded leaflet and te...,...,,,1.984379,,21.000000,,,17.000000,,


In [12]:
female_artworks.dropna(subset=['DateAcquired'])

Unnamed: 0,Title,Artist,ConstituentID,ArtistBio,Nationality,BeginDate,EndDate,Gender,Date,Medium,...,OnView,Circumference (cm),Depth (cm),Diameter (cm),Height (cm),Length (cm),Weight (kg),Width (cm),Seat Height (cm),Duration (sec.)
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,"(Dutch, born 1944) (Dutch, born 1945)",Dutch Dutch,(1944) (1945),(0) (0),(male) (female),1987,Synthetic polymer paint and ink on paper,...,,,,,63.500100,,,99.060200,,
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,"(American, born Poland 1954) (American, born 1...",American American American,(1954) (1935) (1981),(0) (0) (0),(female) (male) (),1989,Computer-generated print on frosted polymer sh...,...,,,3.800000,,121.000000,,,92.700000,,
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,"(British, born Iraq. 1950–2016)",British,(1950),(2016),(female),1991,Synthetic polymer on paper mounted on canvas,...,,,,,129.540300,,,182.880400,,
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,"(Dutch, born 1944) (British, born Greece 1937)...",Dutch British British Dutch,(1944) (1937) (1937) (1945),(0) (0) (0) (0),(male) (female) (male) (female),1975,Ink and acrylic on paper,...,,,,,113.000000,,,68.600000,,
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,"(Dutch, born 1944) (British, born Greece 1937)...",Dutch British British Dutch,(1944) (1937) (1937) (1945),(0) (0) (0) (0),(male) (female) (male) (female),1975,Ink and acrylic on paper,...,,,,,113.000000,,,68.600000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,"(Argentine, born 1941)",American,(1941),(0),(female),2009,Photogravure,...,,,,,39.370079,,,33.655067,,
156912,Mickey / Novia,Liliana Porter,4703,"(Argentine, born 1941)",American,(1941),(0),(female),2009,Photogravure,...,,,,,39.370079,,,33.655067,,
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,"(French, born Bohemia. 1902–1980)",Czech,(1902),(1980),(female),1939,Ink on paper\r,...,,,0.000000,,35.500000,,,45.402591,,
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,"(German, 1931–2007) (German, 1934–2015)",German German,(1931) (1934),(2007) (2015),(male) (female),1968,cardboard box containing folded leaflet and te...,...,,,1.984379,,21.000000,,,17.000000,,


In [13]:
female_artworks.loc[:, 'acquisition_year'] = pd.to_numeric(
    female_artworks['DateAcquired'].astype(str).str[:4], 
    errors='coerce'
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_artworks.loc[:, 'acquisition_year'] = pd.to_numeric(


In [14]:
# female_artworks

In [15]:
outside_range = female_artworks.loc[~female_artworks['acquisition_year'].between(1929, 2024)]

In [16]:
print(f"Number of values not between 1929 and 2024: {len(outside_range)}")

Number of values not between 1929 and 2024: 1368


In [17]:
# print(outside_range)

In [18]:
female_artworks = female_artworks.loc[female_artworks['acquisition_year'].between(1929, 2024)]

In [19]:
# female_artworks

In [20]:
# female_artworks['Nationality'].unique()

In [21]:
# female_artworks_p1_map_animation

In [22]:
female_artworks_p1_map_animation = female_artworks[['Title','Artist','ConstituentID','Nationality','Department', 'acquisition_year']]


In [23]:
geolocator = Nominatim(user_agent="geoapiExercises")


In [24]:
def get_lat_long(country):
    try:
        location = geolocator.geocode(country)
        if location:
            return location.latitude, location.longitude
    except Exception as e:
        print(f"Error fetching coordinates for {country}: {e}")
    return None, None

In [25]:
female_artworks_p1_map_animation.dropna(subset=['Nationality'])

Unnamed: 0,Title,Artist,ConstituentID,Nationality,Department,acquisition_year
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,Dutch Dutch,Architecture & Design,2000.0
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,American American American,Architecture & Design,1992.0
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,British,Architecture & Design,1992.0
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,Dutch British British Dutch,Architecture & Design,1992.0
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,Dutch British British Dutch,Architecture & Design,1992.0
...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,American,Drawings & Prints,2024.0
156912,Mickey / Novia,Liliana Porter,4703,American,Drawings & Prints,2024.0
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,Czech,Drawings & Prints,2024.0
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,German German,Drawings & Prints,2022.0


In [26]:
# female_artworks_p1_map_animation['Nationality'] = female_artworks_p1_map_animation['Nationality'].apply(lambda x: x.split()[0])
female_artworks_p1_map_animation.loc[:, 'Nationality'] = (
    female_artworks_p1_map_animation['Nationality']
    .replace('', 'Unknown')
    .apply(lambda x: x.split()[0] if isinstance(x, str) and x.strip() else 'Unknown')
)


In [27]:
female_artworks_p1_map_animation

Unnamed: 0,Title,Artist,ConstituentID,Nationality,Department,acquisition_year
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,Dutch,Architecture & Design,2000.0
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,American,Architecture & Design,1992.0
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,British,Architecture & Design,1992.0
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,Dutch,Architecture & Design,1992.0
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,Dutch,Architecture & Design,1992.0
...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,American,Drawings & Prints,2024.0
156912,Mickey / Novia,Liliana Porter,4703,American,Drawings & Prints,2024.0
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,Czech,Drawings & Prints,2024.0
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,German,Drawings & Prints,2022.0


In [28]:
unique_nationalities = female_artworks_p1_map_animation['Nationality'].unique()


In [29]:
unique_nationalities

array(['Dutch', 'American', 'British', 'Italian', 'Finnish', 'Japanese',
       'Canadian', 'German', 'Swiss', 'Swedish', 'Danish', 'French',
       'Colombian', 'Spanish', 'Czech', 'Polish', 'Nationality',
       'Russian', 'Icelandic', 'Cuban', 'Brazilian', 'Hungarian',
       'Venezuelan', 'Ukrainian', 'Argentine', 'Unknown', 'Australian',
       'Latvian', 'Belgian', 'Chilean', 'Israeli', 'Austrian', 'Native',
       'Kuwaiti', 'South', 'Peruvian', 'Serbian', 'Mexican', 'Iranian',
       'Pakistani', 'Paraguayan', 'Slovenian', 'Bahamian', 'Turkish',
       'Korean', 'Irish', 'Kenyan', 'Scottish', 'Taiwanese', 'Norwegian',
       'Albanian', 'Malaysian', 'Croatian', 'Bosnian', 'Thai', 'Slovak',
       'Indian', 'Chinese', 'Portuguese', 'Afghan', 'Egyptian',
       'Lebanese', 'Moroccan', 'Romanian', 'Ghanaian', 'Georgian',
       'Palestinian', 'Uruguayan', 'Macedonian', 'Welsh', 'Guatemalan',
       'Syrian', 'New', 'Emirati', 'Greek', 'Sudanese', 'Cameroonian',
       'Nigerian', 

In [30]:
# female_artworks_p1_map_animation = female_artworks_p1_map_animation[female_artworks_p1_map_animation['Nationality'] != 'Unknown']
# Drop rows where Nationality is either 'Unknown' or 'Nationality'
female_artworks_p1_map_animation = female_artworks_p1_map_animation[
    ~((female_artworks_p1_map_animation['Nationality'] == 'Unknown') | (female_artworks_p1_map_animation['Nationality'] == 'Nationality'))
]


In [31]:
demonym_to_country = {
    'Dutch': 'Netherlands',
    'American': 'United States of America',
    'British': 'United Kingdom',
    'Italian': 'Italy',
    'Finnish': 'Finland',
    'Japanese': 'Japan',
    'Canadian': 'Canada',
    'German': 'Germany',
    'Swiss': 'Switzerland',
    'Swedish': 'Sweden',
    'Danish': 'Denmark',
    'French': 'France',
    'Colombian': 'Colombia',
    'Spanish': 'Spain',
    'Czech': 'Czech Republic',
    'Polish': 'Poland',
    'Russian': 'Russia',
    'Icelandic': 'Iceland',
    'Cuban': 'Cuba',
    'Brazilian': 'Brazil',
    'Hungarian': 'Hungary',
    'Venezuelan': 'Venezuela',
    'Ukrainian': 'Ukraine',
    'Argentine': 'Argentina',
    'Unknown': None,  # Placeholder for unknowns
    'Australian': 'Australia',
    'Latvian': 'Latvia',
    'Belgian': 'Belgium',
    'Chilean': 'Chile',
    'Israeli': 'Israel',
    'Austrian': 'Austria',
    'Native': 'United States of America',  
    'Kuwaiti': 'Kuwait',
    'South': 'South Africa',   
    'Peruvian': 'Peru',
    'Serbian': 'Republic of Serbia',
    'Mexican': 'Mexico',
    'Iranian': 'Iran',
    'Pakistani': 'Pakistan',
    'Paraguayan': 'Paraguay',
    'Slovenian': 'Slovenia',
    'Bahamian': 'The Bahamas',
    'Turkish': 'Turkey',
    'Korean': 'South Korea',  
    'Irish': 'Ireland',
    'Kenyan': 'Kenya',
    'Scottish': 'United Kingdom',  # Scotland is part of the UK
    'Taiwanese': 'Taiwan',
    'Norwegian': 'Norway',
    'Albanian': 'Albania',
    'Malaysian': 'Malaysia',
    'Croatian': 'Croatia',
    'Bosnian': 'Bosnia and Herzegovina',
    'Thai': 'Thailand',
    'Slovak': 'Slovakia',
    'Indian': 'India',
    'Chinese': 'China',
    'Portuguese': 'Portugal',
    'Afghan': 'Afghanistan',
    'Egyptian': 'Egypt',
    'Lebanese': 'Lebanon',
    'Moroccan': 'Morocco',
    'Romanian': 'Romania',
    'Ghanaian': 'Ghana',
    'Georgian': 'Georgia',
    'Palestinian': 'Palestine',
    'Uruguayan': 'Uruguay',
    'Macedonian': 'Macedonia',
    'Welsh': 'United Kingdom',  # Wales is part of the UK
    'Guatemalan': 'Guatemala',
    'Syrian': 'Syria',
    'New': 'New Zealand',
    'Emirati': 'United Arab Emirates',
    'Greek': 'Greece',
    'Sudanese': 'Sudan',
    'Cameroonian': 'Cameroon',
    'Nigerian': 'Nigeria',
    'Ivatan': 'Philippines',  # Likely refers to the Ivatan people in the Philippines
    'Nepali': 'Nepal',
    'Bangladeshi': 'Bangladesh',
    'Filipino': 'Philippines',
    'Hunkpapa': 'United States of America',  # A Native American tribe of the Lakota people
}

In [32]:
female_artworks_p1_map_animation.loc[:,'Country'] = female_artworks_p1_map_animation['Nationality'].map(demonym_to_country)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_artworks_p1_map_animation.loc[:,'Country'] = female_artworks_p1_map_animation['Nationality'].map(demonym_to_country)


In [33]:
unique_countries = female_artworks_p1_map_animation['Country'].unique()
unique_countries

array(['Netherlands', 'United States of America', 'United Kingdom',
       'Italy', 'Finland', 'Japan', 'Canada', 'Germany', 'Switzerland',
       'Sweden', 'Denmark', 'France', 'Colombia', 'Spain',
       'Czech Republic', 'Poland', 'Russia', 'Iceland', 'Cuba', 'Brazil',
       'Hungary', 'Venezuela', 'Ukraine', 'Argentina', 'Australia',
       'Latvia', 'Belgium', 'Chile', 'Israel', 'Austria', 'Kuwait',
       'South Africa', 'Peru', 'Republic of Serbia', 'Mexico', 'Iran',
       'Pakistan', 'Paraguay', 'Slovenia', 'The Bahamas', 'Turkey',
       'South Korea', 'Ireland', 'Kenya', 'Taiwan', 'Norway', 'Albania',
       'Malaysia', 'Croatia', 'Bosnia and Herzegovina', 'Thailand',
       'Slovakia', 'India', 'China', 'Portugal', 'Afghanistan', 'Egypt',
       'Lebanon', 'Morocco', 'Romania', 'Ghana', 'Georgia', 'Palestine',
       'Uruguay', 'Macedonia', 'Guatemala', 'Syria', 'New Zealand',
       'United Arab Emirates', 'Greece', 'Sudan', 'Cameroon', 'Nigeria',
       'Philippines', 'N

In [34]:
# coordinates = {country: get_lat_long(country) for country in unique_countries}


In [35]:
geojson_path = "countries.geo.json"
world = gpd.read_file(geojson_path)

In [36]:
if 'name' in world.columns:
    country_column = 'name'
elif 'ADMIN' in world.columns:  # Another common property for country names
    country_column = 'ADMIN'
else:
    raise ValueError("No suitable country name column found in GeoJSON file.")


In [37]:
world['centroid'] = world.geometry.centroid



  world['centroid'] = world.geometry.centroid


In [38]:
country_to_coords = {
    row[country_column]: (row['centroid'].x, row['centroid'].y)
    for _, row in world.iterrows()
}


In [39]:
country_coords_df = pd.DataFrame(list(country_to_coords.items()), columns=['Country', 'Coordinates'])


In [40]:
country_coords_df

Unnamed: 0,Country,Coordinates
0,Afghanistan,"(66.08669017344639, 33.85639924331851)"
1,Angola,"(17.470572472744323, -12.245868967981588)"
2,Albania,"(20.03242638605213, 41.14135336776871)"
3,United Arab Emirates,"(54.20671462253365, 23.86863351514742)"
4,Argentina,"(-65.1753606596912, -35.44682138709455)"
...,...,...
175,West Bank,"(35.27331963061304, 31.941136592975468)"
176,Yemen,"(47.53504457693309, 15.913231867023978)"
177,South Africa,"(25.048013931521393, -28.947033272505536)"
178,Zambia,"(27.727591839703326, -13.395067586158229)"


In [41]:
female_artworks_p1_map_animation
# Add latitude and longitude columns
female_artworks_p1_map_animation.loc[:,'Latitude'] = female_artworks_p1_map_animation['Country'].map(lambda x: country_to_coords.get(x, (None, None))[1])
female_artworks_p1_map_animation.loc[:,'Longitude'] = female_artworks_p1_map_animation['Country'].map(lambda x: country_to_coords.get(x, (None, None))[0])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_artworks_p1_map_animation.loc[:,'Latitude'] = female_artworks_p1_map_animation['Country'].map(lambda x: country_to_coords.get(x, (None, None))[1])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_artworks_p1_map_animation.loc[:,'Longitude'] = female_artworks_p1_map_animation['Country'].map(lambda x: country_to_coords.get(x, (None, None))[0])


In [42]:
female_artworks_p1_map_animation

Unnamed: 0,Title,Artist,ConstituentID,Nationality,Department,acquisition_year,Country,Latitude,Longitude
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,Dutch,Architecture & Design,2000.0,Netherlands,52.298700,5.512217
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,American,Architecture & Design,1992.0,United States of America,45.705630,-112.599438
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,British,Architecture & Design,1992.0,United Kingdom,53.914774,-2.853136
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,Dutch,Architecture & Design,1992.0,Netherlands,52.298700,5.512217
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,Dutch,Architecture & Design,1992.0,Netherlands,52.298700,5.512217
...,...,...,...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,American,Drawings & Prints,2024.0,United States of America,45.705630,-112.599438
156912,Mickey / Novia,Liliana Porter,4703,American,Drawings & Prints,2024.0,United States of America,45.705630,-112.599438
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,Czech,Drawings & Prints,2024.0,Czech Republic,49.775245,15.334558
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,German,Drawings & Prints,2022.0,Germany,51.133723,10.288485


In [43]:
rows_with_nan_latitude = female_artworks_p1_map_animation[female_artworks_p1_map_animation['Latitude'].isna()]
rows_with_nan_latitude

Unnamed: 0,Title,Artist,ConstituentID,Nationality,Department,acquisition_year,Country,Latitude,Longitude
124887,A Magical Substance Flows Into Me,Jumana Manna,49222,Palestinian,Media and Performance,2016.0,Palestine,,
126225,"The Part About the Bandits, Part 1 of the Inci...","Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,
126226,Unforgiving Years - Part 2 of The Incidental I...,"Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,
129184,The Part About the Bandits - Chapter 1,"Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,
129185,The Part About the Bandits - Chapter 2,"Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,
129186,Unforgiving Years - Chapter 3,"Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,
129187,Unforgiving Years - Chapter 4,"Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,
139693,The Part About the Bandits - Chapter 2; Unforg...,"Basel Abbas, Ruanne Abou-Rahme",49570,Palestinian,Media and Performance,2017.0,Palestine,,


In [44]:
female_artworks_p1_map_animation.dropna(subset=['Latitude', 'Longitude'], inplace=True)
print(female_artworks_p1_map_animation[['Latitude', 'Longitude']].isna().sum())


Latitude     0
Longitude    0
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_artworks_p1_map_animation.dropna(subset=['Latitude', 'Longitude'], inplace=True)


In [45]:
print(female_artworks_p1_map_animation['Latitude'].isna().sum())
print(female_artworks_p1_map_animation['Title'].isna().sum())
print(female_artworks_p1_map_animation['Artist'].isna().sum())
print(female_artworks_p1_map_animation['ConstituentID'].isna().sum())
print(female_artworks_p1_map_animation['Nationality'].isna().sum())
# print(female_artworks_p1_map_animation['Latitude'].isna().sum())

# female_artworks_p1_map_animation = female_artworks_p1_map_animation[female_artworks_p1_map_animation['Latitude'] != 'NaN']

0
0
0
0
0


In [46]:
female_artworks_p1_map_animation['country_count'] = female_artworks_p1_map_animation.groupby('Country')['Country'].transform('count')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  female_artworks_p1_map_animation['country_count'] = female_artworks_p1_map_animation.groupby('Country')['Country'].transform('count')


In [47]:
female_artworks_p1_map_animation

Unnamed: 0,Title,Artist,ConstituentID,Nationality,Department,acquisition_year,Country,Latitude,Longitude,country_count
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,Dutch,Architecture & Design,2000.0,Netherlands,52.298700,5.512217,321
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,American,Architecture & Design,1992.0,United States of America,45.705630,-112.599438,13882
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,British,Architecture & Design,1992.0,United Kingdom,53.914774,-2.853136,747
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,Dutch,Architecture & Design,1992.0,Netherlands,52.298700,5.512217,321
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,Dutch,Architecture & Design,1992.0,Netherlands,52.298700,5.512217,321
...,...,...,...,...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,American,Drawings & Prints,2024.0,United States of America,45.705630,-112.599438,13882
156912,Mickey / Novia,Liliana Porter,4703,American,Drawings & Prints,2024.0,United States of America,45.705630,-112.599438,13882
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,Czech,Drawings & Prints,2024.0,Czech Republic,49.775245,15.334558,29
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,German,Drawings & Prints,2022.0,Germany,51.133723,10.288485,1056


In [48]:
# female_artworks_p1_map_animation.to_csv("female_artworks_p1_map_animation.csv", index=False) 

In [49]:
df_jittered = female_artworks_p1_map_animation.copy()
    
# Calculate jitter scale for each point based on country count
jitter_scale = np.sqrt(female_artworks_p1_map_animation['country_count']) * 0.1

# Add random jitter
df_jittered['Latitude'] += np.random.normal(0, jitter_scale, len(female_artworks_p1_map_animation))
df_jittered['Longitude'] += np.random.normal(0, jitter_scale, len(female_artworks_p1_map_animation))


In [50]:
df_jittered

Unnamed: 0,Title,Artist,ConstituentID,Nationality,Department,acquisition_year,Country,Latitude,Longitude,country_count
66,"Villa dall'Ava, Paris (Saint-Cloud), France, E...","Rem Koolhaas, Madelon Vriesendorp",6957,Dutch,Architecture & Design,2000.0,Netherlands,55.696624,5.404878,321
151,"Slow House Project, North Haven, New York, Pla...","Elizabeth Diller, Ricardo Scofidio, Diller + S...",6951,American,Architecture & Design,1992.0,United States of America,46.800439,-119.202155,13882
152,"The Peak Project, Hong Kong, China (Exterior p...",Zaha Hadid,6953,British,Architecture & Design,1992.0,United Kingdom,56.244890,-2.372770,747
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",7559,Dutch,Architecture & Design,1992.0,Netherlands,50.989501,5.109428,321
154,"Egg of Columbus Circle project, New York, New ...","Rem Koolhaas, Zoe Zenghelis, Elia Zenghelis, M...",6957,Dutch,Architecture & Design,1992.0,Netherlands,56.332442,8.095419,321
...,...,...,...,...,...,...,...,...,...,...
156911,Indio / Traje,Liliana Porter,4703,American,Drawings & Prints,2024.0,United States of America,67.564785,-107.291933,13882
156912,Mickey / Novia,Liliana Porter,4703,American,Drawings & Prints,2024.0,United States of America,30.271752,-101.953770,13882
156936,Untitled (Drawing for Střelnice [The Shooting ...,Toyen (Marie Čermínová),5916,Czech,Drawings & Prints,2024.0,Czech Republic,50.754902,15.224387,29
157566,Box catalogue for the exhibition Industriebaut...,"Bernd Becher, Hilla Becher",8095,German,Drawings & Prints,2022.0,Germany,54.506998,8.947796,1056


In [51]:
df_jittered.to_json("jittered_map.json", orient="records")

In [52]:
# df_jittered.to_csv("jittered_map.csv")

In [53]:
# earliest_acquisitions = df_jittered.groupby('ConstituentID')['acquisition_year'].min().reset_index()
# earliest_acquisitions

In [54]:
female_artists_with_work_counts = pd.read_csv("female_artists_with_work_counts.csv")

In [55]:
min_year_idx = df_jittered.groupby('ConstituentID')['acquisition_year'].idxmin()
# acquisitions_with_dept = df_jittered.loc[min_year_idx, ['ConstituentID', 'acquisition_year', 'Department']].reset_index(drop=True)
acquisitions_with_dept = (
    df_jittered.loc[min_year_idx, ['ConstituentID', 'acquisition_year', 'Department']]
    .drop_duplicates(subset=['ConstituentID'])  # Ensure unique rows by ConstituentID
    .reset_index(drop=True)
)

In [56]:
filtered_acquisitions = acquisitions_with_dept[acquisitions_with_dept['ConstituentID'].isin(female_artist_ids)]
filtered_acquisitions

Unnamed: 0,ConstituentID,acquisition_year,Department
0,10,1952.0,Drawings & Prints
1,21,1974.0,Drawings & Prints
2,22,1954.0,Drawings & Prints
3,28,1978.0,Drawings & Prints
4,31,1970.0,Drawings & Prints
...,...,...,...
2310,137853,2024.0,Photography
2311,138111,2024.0,Painting & Sculpture
2312,138280,2008.0,Architecture & Design
2313,138366,2024.0,Drawings & Prints


In [57]:
bins = [1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020, 2025]
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


In [58]:
# Add a new column with the bins
filtered_acquisitions['year_group'] = pd.cut(filtered_acquisitions['acquisition_year'], bins=bins, labels=labels, right=False)
filtered_acquisitions

Unnamed: 0,ConstituentID,acquisition_year,Department,year_group
0,10,1952.0,Drawings & Prints,2
1,21,1974.0,Drawings & Prints,4
2,22,1954.0,Drawings & Prints,2
3,28,1978.0,Drawings & Prints,4
4,31,1970.0,Drawings & Prints,4
...,...,...,...,...
2310,137853,2024.0,Photography,9
2311,138111,2024.0,Painting & Sculpture,9
2312,138280,2008.0,Architecture & Design,7
2313,138366,2024.0,Drawings & Prints,9


In [59]:
filtered_acquisitions = filtered_acquisitions.merge(
    female_artists_with_work_counts[['ConstituentID', 'DisplayName', 'artwork_count']],
    on='ConstituentID',
    how='left'
      # Use 'left' join to keep all rows in filtered_acquisitions
)

In [60]:
filtered_acquisitions

Unnamed: 0,ConstituentID,acquisition_year,Department,year_group,DisplayName,artwork_count
0,10,1952.0,Drawings & Prints,2,Irene Aronson,1.0
1,21,1974.0,Drawings & Prints,4,Ruth Asawa,56.0
2,22,1954.0,Drawings & Prints,2,Isidora Aschheim,1.0
3,28,1978.0,Drawings & Prints,4,Geneviève Asse,50.0
4,31,1970.0,Drawings & Prints,4,Dana Atchley,7.0
...,...,...,...,...,...,...
2310,137853,2024.0,Photography,9,Silvia Rosi,6.0
2311,138111,2024.0,Painting & Sculpture,9,Ulla Wiggen,2.0
2312,138280,2008.0,Architecture & Design,7,Teresita Brigitte Cochran,1.0
2313,138366,2024.0,Drawings & Prints,9,Johanna Unzueta,1.0


In [61]:
filtered_acquisitions['x'] = 0.0
filtered_acquisitions['y'] = 0.0
filtered_acquisitions['z'] = 0.0

In [62]:
filtered_acquisitions

Unnamed: 0,ConstituentID,acquisition_year,Department,year_group,DisplayName,artwork_count,x,y,z
0,10,1952.0,Drawings & Prints,2,Irene Aronson,1.0,0.0,0.0,0.0
1,21,1974.0,Drawings & Prints,4,Ruth Asawa,56.0,0.0,0.0,0.0
2,22,1954.0,Drawings & Prints,2,Isidora Aschheim,1.0,0.0,0.0,0.0
3,28,1978.0,Drawings & Prints,4,Geneviève Asse,50.0,0.0,0.0,0.0
4,31,1970.0,Drawings & Prints,4,Dana Atchley,7.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
2310,137853,2024.0,Photography,9,Silvia Rosi,6.0,0.0,0.0,0.0
2311,138111,2024.0,Painting & Sculpture,9,Ulla Wiggen,2.0,0.0,0.0,0.0
2312,138280,2008.0,Architecture & Design,7,Teresita Brigitte Cochran,1.0,0.0,0.0,0.0
2313,138366,2024.0,Drawings & Prints,9,Johanna Unzueta,1.0,0.0,0.0,0.0


In [63]:
import pandas as pd
import numpy as np

# Assuming your dataframe is called `filtered_acquisitions`
df1 = filtered_acquisitions.copy()

# 1. Assign a flat `y` position (e.g., y = 0)
df1['y'] = 0  # Flat plane along the y-axis

# 2. Generate clusters for rows by `year_group` (timeline along `x`)
spacing_x = 50  # Spacing between year_group clusters along `x`
year_group_positions = {group: i for i, group in enumerate(sorted(df1['year_group'].unique()))}
df1['x'] = df1['year_group'].apply(lambda group: year_group_positions[group] * spacing_x)

# 3. Spread within clusters by department along `z`
spacing_z = 10  # Spacing between department clusters along `z`
department_positions = {dept: i for i, dept in enumerate(sorted(df1['Department'].unique()))}
df1['z'] = df1.apply(lambda row: department_positions[row['Department']] * spacing_z + np.random.uniform(-5, 5), axis=1)

# 4. Scale work count to a reasonable size for rendering (radius of the star)
min_scale = 0.5  # Minimum size for a star
max_scale = 5.0  # Maximum size for a star
df1['scale'] = df1['artwork_count'].apply(lambda x: np.interp(x, (df1['artwork_count'].min(), df1['artwork_count'].max()), (min_scale, max_scale)))

# Final dataframe
# print(df1[['ConstituentID', 'DisplayName', 'x', 'y', 'z', 'scale']])
df2=df1[['ConstituentID', 'DisplayName', 'year_group', 'Department', 'x', 'y', 'z', 'scale']]
print(df2)

      ConstituentID                DisplayName year_group  \
0                10              Irene Aronson          2   
1                21                 Ruth Asawa          4   
2                22           Isidora Aschheim          2   
3                28             Geneviève Asse          4   
4                31               Dana Atchley          4   
...             ...                        ...        ...   
2310         137853                Silvia Rosi          9   
2311         138111                Ulla Wiggen          9   
2312         138280  Teresita Brigitte Cochran          7   
2313         138366            Johanna Unzueta          9   
2314         138440                  Aiko Mogi          9   

                 Department    x  y          z     scale  
0         Drawings & Prints  100  0  12.754454  0.500000  
1         Drawings & Prints  200  0   6.524796  0.573225  
2         Drawings & Prints  100  0  14.444069  0.500000  
3         Drawings & Prints  20

In [64]:
# df2.to_csv('test-stars.csv')

In [65]:
# df2.to_json('test-stars-2.json', orient="records")