# WikiArt dataset cleaning, extending

Wikiart: all art pieces stored as a picture in Wikipedia

In [2]:
import pandas as pd
import numpy as np

In [13]:
df2 = pd.read_csv('datasets/originals/wikiart_art_pieces.csv') # From Kaggle if I remember correctly, but can be downloaded from other sites
print(df2['artist'].value_counts())

artist
Vincent van Gogh              1931
Nicholas Roerich              1843
Pierre-Auguste Renoir         1412
Claude Monet                  1367
Giovanni Battista Piranesi    1352
                              ... 
Arthur Lowe                      2
Vahram Gayfedjian                2
Gisela Colon                     2
Zakar Zakarian                   2
Ferenc Pinter                    1
Name: count, Length: 3209, dtype: int64


In [14]:
drop_artist = ['Byzantine Mosaics', 'Orthodox Icons', 'Romanesque Architecture', 'Viking art', 'Master of the Small Landscapes', 'Fayum portrait', ]
df2_refined = df2[~df2['artist'].isin(drop_artist)].reset_index(drop=True)
df2_refined = df2_refined.drop(columns=['url', 'img', 'file_name']).reset_index(drop=True)
#There are no NaN values in the artist column.
#df2[[row['artist'] is np.nan for i, row in df2.iterrows()]]
print(df2_refined['style'].value_counts()[0:15])
print("\nAmount of pictures:" ,len(df2_refined))
df2_refined[10000:10006]
#Later we drop tags too. 
#A recommendation is to drop style == "Unknown"

style
Impressionism             16083
Realism                   15764
Romanticism               15010
Expressionism             11455
Post-Impressionism         8147
Baroque                    7496
Art Nouveau (Modern)       7382
Surrealism                 6988
Unknown                    6900
Symbolism                  5224
Neoclassicism              4360
Abstract Expressionism     3909
Rococo                     3537
Northern Renaissance       3046
Cubism                     2530
Name: count, dtype: int64

Amount of pictures: 175313


Unnamed: 0,artist,style,genre,movement,tags
10000,Frans Snyders,Baroque,animal painting,Baroque,['Mythology']
10001,Frans Snyders,Baroque,animal painting,Baroque,"['animals', 'birds']"
10002,Frans Snyders,Baroque,still life,Baroque,"['fruits-and-vegetables', 'dishware-and-cutler..."
10003,Frans Snyders,Baroque,still life,Baroque,"['fruits-and-vegetables', 'dishware-and-cutler..."
10004,Frans Snyders,Baroque,still life,Baroque,['food-and-beverages']
10005,Frans Snyders,Baroque,literary painting,Baroque,"['animals', 'foxes', 'herons', 'Wildlife', 'Te..."


In [12]:
#Save refined dataset
df2_refined.to_csv('datasets/wikiart_paintings_refined.csv', index=False)

## Group artists

One way to group:
<details><summary><u>First method</u></summary>
<p>
    
```python
df2_grouped = (df2_refined.drop(columns=['tags'])).groupby(['style','artist', 'movement'])['movement'].count()
df2_grouped = df2_grouped.to_frame()
df2_grouped
#Kind of hard to deal with this frame because it has 2 layers, so you have to get 
```

</p>
</details>

Better method:

In [18]:
df2_grouped2 = (df2_refined.drop(columns=['tags'])).pivot_table(index=['style','artist', 'movement'], aggfunc='size')
df2_grouped2 = df2_grouped2.reset_index(name='count')
df2_grouped2.head(5)

Unnamed: 0,style,artist,movement,count
0,Abstract Art,Ad Reinhardt,Abstract Expressionism,15
1,Abstract Art,Adnan Coker,Abstract Art,25
2,Abstract Art,Akkitham Narayanan,Abstract Art,17
3,Abstract Art,Alberto Magnelli,Abstract Art,19
4,Abstract Art,Alekos Kontopoulos,Social Realism,26


In [None]:
#print(df2_grouped2[df2_grouped2['artist']=="Henri Matisse"])
print(df2_grouped2['movement'].unique()[0:15],'...', '\n Movements:', len(df2_grouped2['movement'].unique()), '\n')
#print('Baroque movement styles:\n', df2_grouped2[df2_grouped2['movement']=='Baroque']['style'].value_counts())
print("Artists:\n", df2_grouped2['artist'].describe())
df2_grouped2[df2_grouped2['artist']=="Pablo Picasso"].sort_values(by=['count'], ascending=False)

In [None]:
print("Amount of groups:", len(df2_grouped2), '\n')
print("Highest style-artist combinations:\n", (df2_grouped2[df2_grouped2['count']> 1200]), '\n\n') 
print(df2_grouped2[df2_grouped2['count']==min(df2_grouped2['count'])][0:6], "\n ... \n Rows with 1 count:", len(df2_grouped2[df2_grouped2['count']==min(df2_grouped2['count'])]), '\n\n') 
print("Unknown styles amount per artist:\n", (df2_grouped2[df2_grouped2['style']=="Unknown"]).sort_values(by=['count'], ascending=False)) 

In [69]:
#Drop Henry Moore with realism, as it conflicts with the other Henry Moore
df2_grouped2 = df2_grouped2.drop([5550]).reset_index(drop=True)

In [71]:
# Save to csv
df2_grouped2.to_csv('datasets/wikiart_artists_styles_grouped.csv', index=False)

In [72]:
df2_grouped2 = pd.read_csv('datasets/wikiart_artists_styles_grouped.csv')
artists_A = pd.DataFrame(df2_grouped2['artist'].unique())
artists_A.columns = ['artist']

In [75]:
def concat_all_attribute_values_per_artist(df,artist_name , attribute_name):
    return df[attribute_name][df['artist']==artist_name].str.cat(sep=', ')

def concat_unique_attribute_values_per_artist(df,artist_name , attribute_name):
    return pd.Series(df[attribute_name][df['artist']==artist_name].unique()).str.cat(sep=', ')

artists_A['styles'] = [concat_all_attribute_values_per_artist(df2_grouped2, artist_name, 'style') for artist_name in artists_A['artist']]
artists_A['movement'] = [concat_unique_attribute_values_per_artist(df2_grouped2, artist_name, 'movement') for artist_name in artists_A['artist']]
artists_A


Unnamed: 0,artist,styles,movements
0,Ad Reinhardt,"Abstract Art, Abstract Expressionism, Color Fi...",Abstract Expressionism
1,Adnan Coker,"Abstract Art, Abstract Expressionism",Abstract Art
2,Akkitham Narayanan,Abstract Art,Abstract Art
3,Alberto Magnelli,"Abstract Art, Art Nouveau (Modern), Cubism, Ex...",Abstract Art
4,Alekos Kontopoulos,"Abstract Art, Cubism, Expressionism, Post-Impr...",Social Realism
...,...,...,...
3198,Serhij Schyschko,Unknown,Academic Art
3199,Vudon Baklytsky,Unknown,Soviet Nonconformist Art
3200,Wolfgang Tillmans,Unknown,Contemporary
3201,Wu Daozi,Unknown,Tang Dynasty (618–907)


In [110]:
def concat_attributes_with_count(df = df2_grouped2, artist_name = None, attribute_name=None):
    full_text =''
    for attr in df[df['artist']==artist_name][attribute_name]:
        string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
        full_text = full_text + string + ','
    return full_text[:-1]

artists_A['styles_extended']=[concat_attributes_with_count(df2_grouped2, artist_name, 'style') for artist_name in artists_A['artist']]

  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string = '{'+attr+':'+str(df[df['artist']==artist_name][df[attribute_name]==attr]['count'].reset_index(drop=True)[0])+'}'
  string

In [114]:
#Add sum of counts
artists_A['pictures_count'] = [sum(df2_grouped2[df2_grouped2['artist']==artist_name]['count']) for artist_name in artists_A['artist']]
artists_A.sort_values(by=['pictures_count'], ascending=False).head(25)

Unnamed: 0,artist,styles,movements,styles_extended,pictures_count
1046,Vincent van Gogh,"Cloisonnism, Impressionism, Japonism, Neo-Impr...",Post-Impressionism,"{Cloisonnism:11},{Impressionism:2},{Japonism:1...",1931
815,Nicholas Roerich,"Art Nouveau (Modern), Byzantine, Japonism, Rea...",Symbolism,"{Art Nouveau (Modern):499},{Byzantine:23},{Jap...",1843
2011,Pierre-Auguste Renoir,"Impressionism, Japonism, Magic Realism, Orient...",Impressionism,"{Impressionism:1368},{Japonism:1},{Magic Reali...",1412
422,Claude Monet,"Academicism, Impressionism, Japonism, Realism,...",Impressionism,"{Academicism:1},{Impressionism:1341},{Japonism...",1367
2432,Giovanni Battista Piranesi,Neoclassicism,Baroque,{Neoclassicism:1352},1352
149,Salvador Dali,"Abstract Art, Abstract Expressionism, Cloisonn...",Surrealism,"{Abstract Art:3},{Abstract Expressionism:11},{...",1178
503,Pablo Picasso,"Academicism, Analytical Cubism, Art Nouveau (M...",Post-Impressionism,"{Academicism:1},{Analytical Cubism:48},{Art No...",1170
838,Theophile Steinlen,"Art Nouveau (Modern), Realism, Unknown",Art Nouveau,"{Art Nouveau (Modern):484},{Realism:545},{Unkn...",1128
72,Henri Matisse,"Abstract Art, Abstract Expressionism, Color Fi...",Impressionism,"{Abstract Art:3},{Abstract Expressionism:44},{...",1008
1566,Bela Czobel,"Expressionism, Fauvism, Impressionism, Post-Im...",Post-Impressionism,"{Expressionism:7},{Fauvism:7},{Impressionism:3...",967


In [116]:
artists_A.to_csv('datasets/wikiart_artists.csv', index=False)

## Fetching birthplaces and birthyears
We gathered birthplace, and birthyear for every artists from the Wikipedia API.
The fetching mechanism is in in *wikiart_birthplace_fetch_script.py* and *wikiart_birthyear_fetch_script.py*, otherwise for a more detailed explanation, see the notebook (*wikiart_fetch_notebook.ipynb*). The clearest output can be found in *wikiart_paintings_with_birth_cleaned.csv*, which has some birthplaces manually added, remaining artists with no birthplace are dropped.

In [21]:
paintings_with_birthplaces = pd.read_csv("datasets/wikiart_paintings_with_birth_cleaned.csv")
paintings_with_birthplaces.head(3)

Unnamed: 0,artist,style,genre,movement,tags,url,img,file_name,birth_place,birth_year
0,Andrei Rublev,Moscow school of icon painting,religious painting,Byzantine Art,"['Christianity', 'saints-and-apostles', 'angel...",https://www.wikiart.org/en/andrei-rublev/angel...,https://uploads5.wikiart.org/images/andrei-rub...,692-angel-presents-monk-pachomius-cenobitic-mo...,Grand Principality of Moscow,1360
1,Andrei Rublev,Moscow school of icon painting,religious painting,Byzantine Art,"['Christianity', 'Old-Testament', 'Daniel', 'p...",https://www.wikiart.org/en/andrei-rublev/proph...,https://uploads8.wikiart.org/images/andrei-rub...,693-prophet-daniel.jpg,Grand Principality of Moscow,1360
2,Andrei Rublev,Moscow school of icon painting,miniature,Byzantine Art,"['Christianity', 'saints-and-apostles', 'Khitr...",https://www.wikiart.org/en/andrei-rublev/st-jo...,https://uploads7.wikiart.org/images/andrei-rub...,694-st-john-the-evangelist.jpg,Grand Principality of Moscow,1360


In [22]:
bpy = paintings_with_birthplaces[['artist','birth_place','birth_year']].drop_duplicates().reset_index(drop=True)
bpy

Unnamed: 0,artist,birth_place,birth_year
0,Andrei Rublev,Grand Principality of Moscow,1360
1,Ivan Rutkovych,Zolochiv Raion,1650
2,Facundus,León,300
3,Hildegard of Bingen,Bermersheim vor der Höhe,1098
4,Nicholas of Verdun,Verdun,1130
...,...,...,...
2931,Toshio Saeki,Tokyo,1902
2932,Emily Kame Kngwarreye,Northern Territory,1910
2933,Johnny Warangkula Tjupurrula,Napperby Station,1925
2934,Norval Morrisseau,Greenstone,1931


Manually added birthplaces of some artists where it was missing, stored in *datasets/artist_birthplaces_manually_created.csv*. This includes all of them who had at least 100 paintings in the dataset, and some other artists. About 200 are still missing, although not all are real artists (e.g. 'Viking art' or 'Fayum portrait').

<details><summary><u>Missing artists</u></summary>

['Ende', 'Herrad of Landsberg', 'Claricia', 'Viking art', 'Toros Roslin', 'Fayum portrait', 'Il Sassetta (Stefano di Giovanni)', 'Nuno Gonçalves', 'Jean Hey', 'Cristovao de Figueiredo', 'Master of the Small Landscapes', 'Isaac Fuller', 'Jacob Peter Gowy', 'Jan Dirksz Both', 'Simon Ushakov', 'Cornelis Norbertus Gysbrechts', 'Johann Georg Pinzel', 'Joseph Duplessis', 'Carl-Ludwig Johann Christineck', 'Marcos Zapata', 'Claude-Joseph Vernet', 'Mikhail Shibanov', 'John Frederick Herring Sr.', 'Fyodor Solntsev', 'Édouard De Bièfve', 'Rosario Weiss Zorrilla', 'George Hemming Mason', 'Michela De Vito', 'Alexey  Bogolyubov', 'Richard Caton Woodville Sr.', 'Johann Koler', 'August Friedrich Schenck', 'Berthold  Woltze', 'Rafael García Hispaleto (El Hispaleto)', 'Alfred Concanen', 'Mary Josephine Walters', 'Edward R. Taylor', 'Richard Caton Woodville Jr.', 'Herbert Gustave Schmalz (Herbert Carmichael)', 'Giovanni Battista Torriglia', 'Angelo Zoffoli', 'Vartan Mahokian', 'Giovanni (Nino) Costa', 'Charles Victor Thirion', 'Alexandre-Jacques Chantron', 'Gustave-Claude-Etienne Courtois', 'Charles Spencelayh', 'Serhij Schyschko', 'Narcisse-Virgilio Diaz', 'Efim Volkov', 'Mykola Yaroshenko', 'Ivan Tvorozhnikov', 'António de Carvalho da Silva Porto', 'William Sidney Cooper', 'Ivan Mrkviсka', 'Gevorg Bashindzhagian', 'Marie Bashkirtseff', 'Henrique Pousao', 'Paja Jovanovic', 'Armando Montaner Valdueza', 'Simeon Velkov', 'Mihri Musfik', 'Francisco Serra Andrés', 'Panos Terlemezian', 'Lady Frieda Harris', 'Arthur Verona', 'Igor Grabar', 'Petro Kholodny (Elder)', 'Benito Quinquela Martin', 'Chang Dai-chien', 'Colette Pope Heldner', 'JAROSLAV KELUC', 'Cricorps', 'Aleksander Belyaev', 'Santiago Rusinol', 'Jules-Alexandre Grun', 'Umehara Ryuzaburo', 'Dumitru Ghiatza', 'Gohar Fermanyan ', 'Tran Van Can', 'Constantin Piliuta', 'Sallinen Tyko', 'Mykhailo Boychuk', 'Mahmoud Saiid', 'Omer Mujadžić', 'Fikret Mualla Saygi', 'George Mavroides', 'Vasile Kazar', 'Luis Dourdil', 'Julio Resende', 'Oswaldo Guayasamin', 'Bui Xuan Phai', 'Andrey Allakhverdov', 'Paulo Tercio', 'Erin Hanson', 'Nuri Iyem', 'Arthur Nísio', 'Hong Song-dam', 'Jon Mcnaughton', 'Elisabeth Sonrel', 'Haralampi Tachev', 'A.Y. Jackson', 'Jose de Almada-Negreiros', 'Vytautas Kairiukstis', 'Marevna (Marie Vorobieff)', 'Corneliu Michailescu', 'Haroutiun Galentz', 'Noguchi Isamu', 'Adnan Coker', 'Shozo Shimamoto', 'Ion Alin Gheorghiu', 'Romul Nutiu', 'Alvaro Lapa', 'Gary Wragg', 'Georges Troubat', 'Thiago Boecan', 'Nikolai Kulbin', 'Oleksandr Bogomazov', '[ a y s h ]', 'Ramirez Villamizar', 'Carmen Osés Hidalgo', 'Javad Hamidi', 'David Chethlahe Paladin', 'Clarence Holbrook Carter', 'Sergey Belik', 'John Vassos', 'Angel Planells', 'Paul Paun', 'Gustavo Foppiani', 'Leonid Sejka', 'Jose Escada', 'The Game of Marseille', 'H.R. Giger', 'Ian Bent', 'Sarunas Sauka', 'Alan Stephens Foster', 'Veniamin Kremer', 'Oleksandr Pashenko', 'Sergiy Grigoriev', 'Kukryniksy', 'Victor Zaretsky', 'Valerii Lamakh', 'Viktor Shatalin', 'Georgyi Yakutovytch', 'Mikhail Olennikov', 'Sun Mu', 'Sami Gattoufi', 'Arthur Pinajian', 'Peter Busa', 'Robert De Niro, Sr.', 'Stefan Sevastre', 'Manuel Cargaleiro', 'Gebre Kristos Desta', 'Melissa Meyer', 'Steve Wheeler', 'Akira Kanayama', 'Matsutani', 'Sergio Mario Illuminato', 'Thomas Downing', 'Ward Jackson', 'Howard Mehring', 'Sven Lukin', 'Robert Huot', 'Navjot Altaf', 'Katrien De Blauwer', 'Nieves Mingueza', 'May Wilson', 'Chaibia Talal', 'Dmytro Kavsan', 'Armin Andreas Pangerl', 'Ilya Isupov', 'Thomas Riesner', 'Alan Tellez', 'Bernadette Resha', 'Christian Royal', 'Kim Prisu', 'Roland Petersen', 'Graca Morais', 'Martha Diamond', 'Valeria Trubina', 'Lucy Ivanova', 'Benjamin Canas', 'José Luis Cuevas', 'Jarik Jongman', 'Carmen Delaco', 'Julio Le Parc', 'Rubem Ludolf', 'Vudon Baklytsky', 'Boris Bućan', 'R. B. Kitaj', 'Rene Bertholo', 'Jerry W. McDaniel', 'Derek Boshier', 'Jose de Guimaraes', 'Deborah Azzopardi', 'Sonaly Gandhi', 'Arthur Pan', 'D. Jeffrey Mims', 'Luis Álvarez Roure', 'Goran Vojinovic', 'Raúl Berzosa', 'Barton Lidice Benes', 'Bernd and Hilla Becher', 'Francis Naranjo', 'Anima Ehtiat', 'Johannes Jan Schoonhoven', 'Warren Rohrer', 'Jean-Pierre Raynaud', 'Phil Sims', 'Gunter Umberg', 'Takamatsu Jiro', 'Fusun Onur', 'Zahrah Al-Ghamdi', 'Norman Zammitt', 'Doug Wheeler', 'Christo and Jeanne-Claude', 'Alexander Shilov', 'YiFei  Chen', 'Mary Jane Ansell', 'Saul Zanolari', 'Kexin Di', 'Amir Baradaran', 'Rolf Amstrong', 'Georg Miciú', 'Rashid Al Khalifa', 'Pietropoli Patrick', 'Charly Palmer', 'Joseph Lorusso', 'Gazmend Freitag', 'Graydon Parrish', 'Devin Leonardi', 'Malte Sonnenfeld', 'Robert Silvers', 'Babak-Matveev', 'Silviu Oravitzan', 'Epsylon Point', 'Phase 2', 'TRACY 168', 'Speedy Graphito', 'Bahia Shehab', 'Alaa Awad', 'Alberto Pereira', 'TAKI 183', 'Simon Stalenhag', 'Omer Uluc', 'Viorel Marginean', 'Lino Tagliapietra', 'Luciano Bartolini', 'Giuliana Lazzerini', 'Eleonora Brigalda Barbas', 'Kerry O. Furlani', 'Darren Waterston', 'Mariojosé Ángeles', 'Francis A. Willey', 'Benoit Maire', 'Cristiano Tassinari', 'Yulia Mamontova', 'Justin Earl Grant', 'Kinder Album', 'Rodrigo Franzao', 'Kateryna Lysovenko', 'Chaokun Wang', 'EtchingRoom1', 'Dayou Lu', 'Shin Yoon-bok', 'Se-Ok Suh', 'Park Seo-Bo', 'Ha Chong-Hyun', 'Olowe of Ise', 'Stephen Mopope']

```python

x = [a for a in paintings_with_birthplaces_noncleaned['artist'].unique() if a not in artists['artist'].unique()]

```

</details>


### Put birth data into the artist dataframe

In [125]:
artists_A_birth = artists_A.copy()
artists_A_birth = artists_A_birth.merge(bpy[['artist', 'birth_place', 'birth_year']], on='artist', how='left')
artists_A_birth

Unnamed: 0,artist,styles,movement,styles_extended,pictures_count,birth_place,birth_year
0,Ad Reinhardt,"Abstract Art, Abstract Expressionism, Color Fi...",Abstract Expressionism,"{Abstract Art:15},{Abstract Expressionism:5},{...",52,Buffalo,1913.0
1,Adnan Coker,"Abstract Art, Abstract Expressionism",Abstract Art,"{Abstract Art:25},{Abstract Expressionism:3}",28,,
2,Akkitham Narayanan,Abstract Art,Abstract Art,{Abstract Art:17},17,Kerala,1939.0
3,Alberto Magnelli,"Abstract Art, Art Nouveau (Modern), Cubism, Ex...",Abstract Art,"{Abstract Art:19},{Art Nouveau (Modern):2},{Cu...",35,Florence,1888.0
4,Alekos Kontopoulos,"Abstract Art, Cubism, Expressionism, Post-Impr...",Social Realism,"{Abstract Art:26},{Cubism:5},{Expressionism:10...",79,Lamia,1904.0
...,...,...,...,...,...,...,...
3198,Serhij Schyschko,Unknown,Academic Art,{Unknown:9},9,,
3199,Vudon Baklytsky,Unknown,Soviet Nonconformist Art,{Unknown:46},46,,
3200,Wolfgang Tillmans,Unknown,Contemporary,{Unknown:9},9,Remscheid,1968.0
3201,Wu Daozi,Unknown,Tang Dynasty (618–907),{Unknown:8},8,Chang'an,680.0


In [126]:
artists_A_birth.to_csv('datasets/wikiart_artists.csv', index=False)

## Checking for issues
In the Art500k datasets, a common issue is artists stored with different names (e.g. "Rembrandt" and "Rembrandt van Rijn"). Let's check if this is the case here too:

In [40]:
print(artists_A_birth[artists_A_birth["artist"].str.contains("Rembrandt")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("Gogh")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("Picasso")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("(?i)Da Vinci")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("Michelangelo")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("Monet")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("Renoir")]["artist"].value_counts())
print(artists_A_birth[(artists_A_birth["artist"].str.contains("Dali")) | artists_A_birth["artist"].str.contains("Dalí")]["artist"].value_counts())
print(artists_A_birth[artists_A_birth["artist"].str.contains("Cezanne") | artists_A_birth["artist"].str.contains("Cézanne")]["artist"].value_counts())

artist
Rembrandt          1
Rembrandt Peale    1
Name: count, dtype: int64
artist
Vincent van Gogh    1
Name: count, dtype: int64
artist
Pablo Picasso    1
Name: count, dtype: int64
artist
Leonardo da Vinci    1
Name: count, dtype: int64
artist
Michelangelo               1
Michelangelo Pistoletto    1
Name: count, dtype: int64
artist
Claude Monet    1
Name: count, dtype: int64
artist
Pierre-Auguste Renoir    1
Name: count, dtype: int64
artist
Salvador Dali    1
Name: count, dtype: int64
artist
Paul Cezanne    1
Name: count, dtype: int64


Appearantly not.

One issue is that some possibly important painters may be missing. In the raw data, I did not find any match for Chagall (or Shagal).