# Préparation des figures de statistiques descriptives

### Todo : 
Produire les figures, en global et par institution :
1. Ratio Femme/reste pour les acquisitions par année depuis 1945 : **DONE**
2. Age moyen à l'acquisition des hommes et des femmes (abandonné : deux bars a cote, mettre aussi le Q1 et Q3): **DONE**
3. Ratio oeuvres d'artistes Français/reste dans les acquisitions par nationalité (une courbe pour les hommes, une pour les femmes) : **DONE**
4. Sortir sous forme de matrice colorée le ratio H/F dans les 29 domaines en fonction du temps : **DONE**
5. Sous forme de courbe, une par mode d'acquisition, le ratio de femme

### Important pour le rapport :
- sortir les figures en ratio 2:1
- format .png, background transparent

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LinearSegmentedColormap
import re
import unicodedata
# %matplotlib inline

In [40]:
pd.set_option('display.max_columns',200)
pd.set_option('display.max_rows',200)
global color_h, color_f, color_b, PLOTDIM, FREQYEARS
color_h = "gold"
color_f = "royalblue"
color_b = "lightgray"
PLOTDIM = (9,6)
FREQYEARS = 10

In [92]:
matplotlib.font_manager._rebuild()
plt.rcParams['font.family'] = 'Roboto'
plt.rcParams['font.sans-serif'] = 'Roboto'
plt.rcParams['font.weight'] = 500
# matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf')

In [3]:
# Les données sont stockées dans le dossier "data", c'est plus clean
authors  = pd.read_csv('../data/ALL_AUTHORS (live_work + clean nat).csv',sep=',', low_memory=False)
art = pd.read_csv('../data/ALL_ARTWORKS.csv',low_memory=False)

In [4]:
def validname(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
    """
    value = re.sub('[^\w\s-]', '', value).strip().lower()
    value = re.sub('[-\s]+', '-', value)
    return value

### Etape préliminaire 1 : transformer les collections mal orthographiées

In [5]:
# Transform bad collections
art.loc[art['collection'] == "Musée national d'art moderne / Centre de cr��ation industrielle", 'collection'] = "Musée national d'art moderne / Centre de création industrielle"
art.loc[art['collection'] == "Mus��e d'Art moderne et contemporain de la Ville de Strasbourg", 'collection'] = "Musée d'Art moderne et contemporain de la Ville de Strasbourg"
art.loc[art['collection'] == "Mus��e national d'art moderne / Centre de création industrielle", 'collection'] = "Musée national d'art moderne / Centre de création industrielle"
art.loc[art['collection'] == "La Piscine, Mus��e d'art et d'industrie André Diligent, Roubaix", 'collection'] = "La Piscine, Musée d'art et d'industrie André Diligent, Roubaix"
art.loc[art['collection'] == "Mus��e d'art contemporain de Lyon", 'collection'] = "Musée d'art contemporain de Lyon"
#print(art['collection'].value_counts())
print(len(art['collection'].unique()))

59


### Etape préliminaire 2 : créer les sous-dossiers s'ils n'y sont pas déjà

In [6]:
# Create subfolders to store figures by museum:
if not os.path.exists('./figures/_global'):
    os.makedirs('./figures/_global')
if not os.path.exists('./figures/_fracs'):
    os.makedirs('./figures/_fracs')     
for museum in art['collection'].dropna().unique():
    n = validname(museum)
    if not os.path.exists('./figures/' + n):
        os.makedirs('./figures/' + n)

### Etape préliminaire 3 : "donner un genre" aux oeuvres

In [7]:
authors.sample(20)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Id artist,name,name extended,type,Birth year,Death year,Birth city,Birth state,Birth country,Death city,Death state,Death country,Gender,Nationality (original),ID artworks,average_year,artworks_creation_years,acquisition_year,name_lower,live_and_work,nationality_clean
13409,13409,13409,9000000000072939,Alain Le Yaouanc,,artiste,1940.0,,Alençon,Orne,France,,,,masculin,française,140000000022860|140000000024561|18000000000572...,1967.0,1968|1976|1977|1977,,alain le yaouanc,0,française
6692,6692,6692,140000000007031,Michel Gayout,,artiste,,,,,,,,,masculin,française,140000000032173,,,1972,michel gayout,0,française
18328,18328,18328,200000000003129,Jorg Neitzert,,artiste,1942.0,,Berlin,,Empire fédéral allemand,,,,masculin,allemande (avant 1949),200000000014513,1942.0,,1970,jorg neitzert,"Vit et travaille à Paris (Ile-de-France, Franc...",allemande
32949,32949,32949,110000000000583,Rodolphe Bresdin,,artiste,1822.0,1885.0,Montrelais,Loire-Atlantique,France,Sèvres,Hauts-de-Seine,France,masculin,française,140000000062268|190000000067606,1855.0,1859,1912|1992,rodolphe bresdin,0,française
1589,1589,1589,9000000000076686,Louis-Emile Decorchemont,,artiste,1851.0,1920.0,Autils,Eure,France,Conches,Seine-et-Marne,France,masculin,française,140000000031779,1885.0,,1878,louis-emile decorchemont,0,française
26850,26850,26850,140000000009498,Maurice Thénot (René Maurice Georges Thénot-Pe...,,artiste,1893.0,1963.0,Saint-Maur-des-Fossés,Val-de-Marne,France,,,,masculin,française,140000001658705,1928.0,,1920,maurice thénot (rené maurice georges thénot-pe...,0,française
32973,32973,32973,9000000000072493,Claude Breton,,artiste,1928.0,2006.0,Paris,,France,Paris,,France,masculin,française,180000000005115|200000000011580|20000000001536...,1967.0,,1960|1956|1960|1962|1962|1964|1965|1965|1965|1...,claude breton,0,française
5924,5924,5924,140000000003527,"Frère Athanase (Alexandre Grellet, dit)",,artiste,1835.0,1918.0,Vienne,Isère,France,,,,masculin,française,140000000074167|140000000081656|14000000007093...,1872.0,1863|1864|1870|1875|1877|1879,1863|1864|1870|1875|1877|1879,"frère athanase (alexandre grellet, dit)",0,française
2942,2942,2942,6660444,ANONYME (JEAN COCTEAU (D'APRÈS)),,anonyme,,,,,,,,,,,480000000001253|480000000001024|48000000000068...,1992.0,1958|1989|1992|2000|2000|2000|2000|2000,2005|2005|2005|2005|2005|2005|2005|2005|2005,anonyme (jean cocteau (d'après)),0,
495,495,495,240000000000480,Pierre-Auguste Cot,,artiste,1837.0,1883.0,Bédarieux,Hérault,France,Paris,,France,masculin,française,140000000078027,1863.0,1870,1870,pierre-auguste cot,0,française


In [8]:
art['Gender'] = None
counter = 0
done = 0
length = len(art)
done_percentage = 0

for _,i in art.iterrows():
    counter += 1
    if pd.isnull(i['authors']) == False:
        c = re.split(r'\|', i['authors'])#split
        # If several authors: we see if they all have same gender, else "groupe"
        if len(c) > 1:
            gender_list = [authors[authors['Id artist'] == int(b)]['Gender'].values[0] for b in c]
            if all("masculin" == g for g in gender_list):
                var = "masculin"
            elif all("féminin" == g for g in gender_list):
                var = "féminin"
            else:
                var = "groupe"
        # If one author: trivial
        else:
            b = c[0]
            var = authors[authors['Id artist'] == int(b)]['Gender'].values[0]

        art.at[_,'Gender'] = var
        done += 1
    
    if counter % 100000==0:
        print(f'percentage: {round(100*counter/length, 3)}%, total = {counter}, done = {done}, done percentage = {round(100*done/length, 3)}%')
        # print(i['Gender'])

percentage: 26.667%, total = 100000, done = 100000, done percentage = 26.667%
percentage: 53.334%, total = 200000, done = 200000, done percentage = 53.334%
percentage: 80.002%, total = 300000, done = 299996, done percentage = 80.001%


In [9]:
art.sample(20)

Unnamed: 0,_id,ensemble_id,nb_elements,related,type,recap_inventory,recap_title,recap_nature,title_notice,title_list,title_ensemble,collection_department,dimensions_additional,inscriptions,expositions_without_current,expositions,bibliography,copyright,author_in_common,is_dissoc,collection,acquisition_mode,recap_copyright,date_creation,acquisition_year,domain,domain_leaf,domain_deno_for_grid,domain_description_mst,comments,recap_description,recap_authors,authors_notice,authors_list,dimensions,recap_dimensions,acquisition,inventory,inventory_for_grid,key_words_thema,rights_management_leaf,default_tooltip_ua_description,authors_name_complement,authors_site,authors_documents,authors_video,authors_nationality,authors_birth_death,live_and_work,author_bibliography,recap_nationality,recap_name_complement,recap_birth_death,recap_live_and_work,recap_author_bibliography,recap_live_work,nb_images,medias,recap_image_unavailable,authors,localisation_if_deposit,number_provisory,key_words_icono,image_unavailable,title_serial,key_words_movement,title_attributed,creation_stage,domain_deno,deposit_number,deposit_number_for_grid,tirage_design,number_exhibition,realisation_location,recap_multi,dimensions_without_margin,tirage,collaborators_design,authors_live_work,collaborators,tirage_photo,title_old,number_entry,production_circumstances,subtitle,number_identification,number_artist_studio,title_other,text_notes,title_old_by_artist,number_document,title_collection,number_depositary_or_loaner,recap_bibliography,ensemble,old_owners,recap_title_trad,number_artist,trans_title_attributed,number_isbn,number_succession,trans_subtitle,number_issn,mnam_mnr,trans_title_serial,number_catalogue,number_frame,trans_title_ensemble,trans_title_other,trans_title_old,old_attributions,acquisition_mode_clean,Gender
1266,200000000017169,,,,individual,,,,Bretagne,Bretagne,,,,,,,,© droits réservés,a,,Fonds municipal d'art contemporain de la Ville...,Achat,,s.d.,1954.0,Peinture,Peinture,Peinture,Aquarelle sur papier,,,,Léo COURTOIS,COURTOIS Léo,61 x 76 cm,,Achat en 1954,CMP9755,CMP9755,,..Type de droits non identifié,Aquarelle sur papier,,,,,française,"Lorient (Bretagne, France), 1914",,,,,,,,,,,,9000000000083818,,,,no_image,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Achat,masculin
171602,390000000018727,64.0,9.0,390000000018727|390000000018729|39000000001873...,separable,G161268 (1 à 9),Scottish Tartans,Ensemble,Scottish Tartans,Scottish Tartans,,,,,Hôtel du Pavot 2 : Bussy-Saint-Martin (France)...,Hôtel du Pavot 2 : Bussy-Saint-Martin (France)...,,© Jonathan Martin,n,(dissociable),Frac Ile-de-France,Achat,© Jonathan Martin,2015,2016.0,Reproduction photomécanique,Reproduction photomécanique,Reproduction photomécanique,Impression numérique sur papier A0,,,Jonathan MARTIN,Jonathan MARTIN,MARTIN Jonathan,"119 x 84,5 cm","118,9 x 84,1 cm",Achat à Jonathan Martin en 2016,G161268 (1),G161268 (1),,..Droits directs par l'artiste,Impression numérique sur papier A0,,<a href='http://www.analucia.net' rel='link' t...,,,française,"Les Lilas (Seine-Saint-Denis, France), 1986",Vit et travaille à Nogent-sur-Marne (Val-de-Ma...,,française,,"Les Lilas (Seine-Saint-Denis, France), 1986",Vit et travaille à Nogent-sur-Marne (Val-de-Ma...,,Vit et travaille à Nogent-sur-Marne (Val-de-Ma...,1.0,90000000451698,recap_no_image,390000000018706,,,,,,,,,,,,,,,,,Edition 1/3 + 2 EA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Achat,masculin
102170,140000000105367,1390.0,14.0,140000000088551|140000000105362|14000000010536...,separable,FNAC 15739 (1 à 14),Etudes de vieux saules,Album,Saules,Saules,,Arts plastiques,,S.D.B.DR. : C.Kieffer 25,,,,© droits réservés,n,,Centre national des arts plastiques/Fonds nati...,Achat,© droits réservés,1925,1938.0,Dessin,Dessin,Dessin,EtudePlume et encre de chine,,14 dessins et estampes regroupés dans deux alb...,Clément Marie KIEFFER,Clément Marie KIEFFER,KIEFFER Clément Marie,"16,8 x 25,4 cm",,Achat à l'artiste en 1938,FNAC 15739 (7)Centre national des arts plastiques,FNAC 15739 (7)Centre national des arts plastiques,,..Type de droits non identifié,EtudePlume et encre de chine,,,,,française,"Varize (Moselle, France), 1881 - 1964",,,française,,"Varize (Moselle, France), 1881 - 1964",,,,1.0,90000000346373,recap_no_image,9000000000074486,<b>En dépôt depuis 1938 : La Cour d'Or - Musée...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"Album I, p.51",,,,,,,,,,,,,,,,,Achat,masculin
68499,180000000000931,,,,individual,,,,Nu assis au fond de feuillage,Nu assis au fond de feuillage,,,,S.D.B.G.: Gromaire 1929,Nus en quête d'idéal : l'érotisme de Marcel Gr...,Nus en quête d'idéal : l'érotisme de Marcel Gr...,Marcel Gromaire 1892/1971. - Musée d'art moder...,"© Adagp, Paris",a,,Musée d'art moderne de la Ville de Paris,Legs,,1929,1953.0,Peinture,Peinture,Peinture,Huile sur toile,,,,Marcel GROMAIRE,GROMAIRE Marcel,"100,5 x 81 cm",,Legs du Docteur Maurice Girardin en 1953,AMVP 709,AMVP 709,,Adagp (Paris),Huile sur toile,,,,,française,"Noyelles-sur-Sambre (Nord, France), 1892 - Par...",,,,,,,,,1.0,90000000465693,,9000000000066178,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Legs,masculin
172738,150000000417568,,,,individual,,,,Autoportrait,Autoportrait,,Cabinet de la photographie,,,,,,© Succession Daniel Masclet,a,,Musée national d'art moderne / Centre de créat...,Attribution Etat,,vers 1945,2011.0,Photographie,Photographie,Photographie,Epreuve gélatino-argentique,,,,Daniel MASCLET,MASCLET Daniel,21 x 17 cm,,"Achat grâce au mécénat de Yves Rocher, 2011. A...",AM 2012-4622,AM 2012-4622,"portrait (personnalité), photographe, autoport...",M. Georges Masclet (Paris),Epreuve gélatino-argentique,,,,,française,"Blois (Loir-et-Cher, France), 1892 - Paris (Fr...",,,,,,,,,1.0,90000000446525,,9000000000079734,,EC2010-1-PHB3827,"noir et blanc, Masclet Daniel (1892-1969) (rep...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Attribution,masculin
246238,140000000057244,,,,individual,,,,,Vase,,Arts décoratifs,,,,,,© droits réservés,a,,Centre national des arts plastiques/Fonds nati...,Achat en salon,,s.d.,1920.0,Objet/Design,Vase,Objet/Design,Vase jaune en forme de poirePâte de verre,,,,Joaquim SALA,SALA Joaquim,hauteur: 21 cmdiamètre: 12 cm,,Achat en salon à l'artiste en 1920 (Salon d'Au...,FNAC 7094Centre national des arts plastiques,FNAC 7094Centre national des arts plastiques,,..Type de droits non identifié,Vase jaune en forme de poirePâte de verre,,,,,,,,,,,,,,,,,,9000000000075192,<b>En dépôt depuis le 05/08/1921 : Préfecture ...,,,no_image,,,,,Vase,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Achat,masculin
213357,160000000000914,,,,individual,,,,Tête,Tête,,,,,Picasso. Oeuvres reçues en paiement des droits...,Picasso. Oeuvres reçues en paiement des droits...,"BOZO, Dominique (dir.), [Exposition. Paris, Gr...",© Succession Picasso,a,,Musée national Picasso-Paris,Dation,,1931,1979.0,Sculpture,Sculpture,Sculpture,"Plâtre teinté, bois, fer et clous",,,,Pablo PICASSO,PICASSO Pablo,"57 x 48 x 23,5 cm",,Dation en 1979,MP268,MP268,,Picasso Administration (Paris),"Plâtre teinté, bois, fer et clous",,,,,espagnole,"Málaga (Espagne), 1881 - Mougins (Alpes-Mariti...",,,,,,,,,1.0,90000000264171,,9000000000066930,,,,,,,,,,,,,,[Boisgeloup],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Dation,masculin
282536,260000000000544,7.0,58.0,260000000000499|260000000000500|26000000000050...,separable,"77.979.17.148 (1), 77.979.17.148 (2), 77.979.1...",Les Histoires farfelues de Papaski,Album,,(Sans titre),,collection d'Art Graphique,,R.B.DR.: 77.979.17.169(2),,,UNGERER (Tomi).- I'm Papa Snap and these are m...,© Musées de Strasbourg / Diogenes Verlag AG Zü...,n,(dissociable),"Musée Tomi Ungerer, Centre international de l'...",Donation,© Musées de Strasbourg / Diogenes Verlag AG Zü...,avant 1971,1979.0,Dessin,Dessin,Dessin,Kromo Klopp estrentré dans un arbre avec sa vo...,,Ensemble de 58 dessins d'illustration du livre...,"Tomi UNGERER (Jean Thomas UNGERER, dit)","Tomi UNGERER (Jean Thomas UNGERER, dit)","UNGERER Tomi (UNGERER Jean Thomas, dit)","42 x 30,6 cm24,8 x 20,1 cm (hors marge)",,Donation de l'artiste en 1979,77.979.17.169 (2),77.979.17.169 (2),,..Type de droits non identifié,Kromo Klopp estrentré dans un arbre avec sa vo...,,<a href='http://www.tomiungerer.com' rel='link...,,,française,"Strasbourg (Bas-Rhin, France), 1931",Vit et travaille à Goleen (Irlande) depuis 1976,,française,,"Strasbourg (Bas-Rhin, France), 1931",Vit et travaille à Goleen (Irlande) depuis 1976,,Vit et travaille à Goleen (Irlande) depuis 1976,1.0,9000000098337|90000000100129|90000000100128|90...,,9000000000070912,,,,,,,,,,,,,,,,"24,8 x 20,1",,,,Editeur : Diogenes Verlag AG Zürich,,,,,,,,,,,,,,UNGERER (Tomi).- I'm Papa Snap and these are m...,,"1973 - 1979 : Diogenes Verlag, Zurich (Suisse)",I'm Papa Snap and these are my favourite no su...,,,,,,,,,,,,,,,Donation,masculin
223695,230000000038682,409.0,10.0,230000000038275|230000000038323|23000000003832...,separable,"990-5-32, 990-5-52, 990-5-53, 990-5-193, 990-5...",Etudes pour En robes de soie dans la forêt,ensemble,Etude de composition pour En robes de soie dan...,Etude de composition pour En robes de soie dan...,,,,N. S. n. D.,,,,© droits réservés,n,,"La Piscine, Musée d'art et d'industrie André D...",Achat,© droits réservés,circa 1927,1990.0,Dessin,Dessin,Dessin,Crayon graphite sur papier,,Ces dessins se rapportent à la préparation de ...,Robert Eugène POUGHEON,Robert Eugène POUGHEON,POUGHEON Robert Eugène,32 x 25 cm,,Achat à la Galerie Pierre Gaubert en 1990,990-5-211,990-5-211,,..Type de droits non identifié,Crayon graphite sur papier,,,,,française,"Paris (France), 1886 - Paris (France), 1955",,,française,,"Paris (France), 1886 - Paris (France), 1955",,,,1.0,90000000488525,recap_no_image,9000000000066959,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Achat,masculin
268481,140000000088249,285.0,6.0,140000000088244|140000000088245|14000000008824...,separable,"FNAC 983, FNAC 997, FNAC 998, FNAC 1016, FNAC ...",,ensemble,,Mobilier liturgique,,Arts décoratifs,,,,,,"© Adagp, Paris",n,,Centre national des arts plastiques/Fonds nati...,Achat,"© Adagp, Paris",s.d.,1957.0,Objet/Design,Mobilier liturgique,Objet/Design,4 chandeliers d'autel et une croix d'autel orn...,,1956Ensemble mobilier destiné à la cathédrale ...,Raymond SUBES,Raymond SUBES,SUBES Raymond,,,Achat à l'artiste en 1957,FNAC 1018Centre national des arts plastiques,FNAC 1018Centre national des arts plastiques,,Adagp (Paris),4 chandeliers d'autel et une croix d'autel orn...,,,,,française,"Paris (France), 1891 - ?, 1970",,,française,,"Paris (France), 1891 - ?, 1970",,,,,,recap_no_image,9000000000075282,<b>En dépôt : Cathédrale Notre-Dame de Rouen</b>,,,element_no_image,,,,,Mobilier liturgique,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Achat,masculin


In [10]:
art['Gender'].value_counts()

masculin         293730
féminin           47270
groupe            11602
non renseigné      6703
Name: Gender, dtype: int64

In [11]:
art['Gender'].count()/len(art) # 4,2% de NaN

0.9581671075649614

In [12]:
fracs = []
for museum in art['collection'].dropna().unique():
    if "frac" in museum.lower():
        fracs.append(museum)
len(fracs)

18

## Partie 1 : Proportion du nombre de femmes dans les acquisitions

In [13]:
# Function to plot the proportion of femmes in any subset of Arworks
def get_ratioF(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    # We always plot the general evolution on the background
    f = art[art['Gender']=='féminin'].groupby('acquisition_year').count()['_id']
    total = art.groupby('acquisition_year').count()['_id']
    ratio_general = f/total
    # ratio_general.plot(figsize=PLOTDIM, color=color_b, linewidth=2, ax=ax)
    # We plot the proportion in the subset of artworks of interest
    total1 = subset.groupby('acquisition_year').count()['_id']
    f1 = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()
    if len(f1) == 0:
        f1 = total1.copy()
        f1[:] = 0
    else:
        f1 = f1['_id']
    ratio = f1/total1
    ratio = ratio.reindex(range(1945, 2018))
    ratio.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    # Then we customize the plot
    # ax.set_title("Proportion d'artistes femmes dans les acquisitions, " + subset_name, fontsize=20)
    # ax.set_xlabel("Année d'acquisition", fontsize=20)
    # ax.set_ylabel("Proportion d'artistes femmes", fontsize=20)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -0.01,top = 1.01)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in ax.get_yticks()])
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=150, transparent=True)
    plt.close()

In [95]:
get_ratioF(art, art, "ensemble des collections", "./figures/_global/ratio_overall.png")
get_ratioF(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC", "./figures/_fracs/ratio_overall.png")

In [68]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratioF(art, subset, museum, "./figures/" + validname(museum) + "/ratio_overall.png")
    del subset
print('Done')

Done


### Aparté

In [16]:
# A study on "is it a good idea to group by separable artworks" => NO
uniquegroups = art[art['type'] == 'separable'].groupby('related').first()
# uniquegroups = art[art['type'] == 'separable'].groupby('related').agg(lambda x:x.value_counts().index[0])
# matisse = art[art['related'] == '70000000007610|70000000007613|70000000007616|70000000007619|70000000007622|70000000007625|70000000007628|70000000007631|70000000007634|70000000007637|70000000007640|70000000007643|70000000007646|70000000007649|70000000007652|70000000007655|70000000007658|70000000007661|70000000007664|70000000007667|70000000007670|70000000007673|70000000007676|70000000007679|70000000007682|70000000007685|70000000007688|70000000007691|70000000007694|70000000007697|70000000007700|70000000007703|70000000007706|70000000007709|70000000007712|70000000007715|70000000007718|70000000007721|70000000007724|70000000007727|70000000007730|70000000007733|70000000007736|70000000007739|70000000007742|70000000007745|70000000007748|70000000007751|70000000007754|70000000007757|70000000007760|70000000007763|70000000007766|70000000007769|70000000007772|70000000007775|70000000007778|70000000007781|70000000007784|70000000007787|70000000007790|70000000007793|70000000007796|70000000007799|70000000007802|70000000007805|70000000007808|70000000007811|70000000007814|70000000007817|70000000007820|70000000007823|70000000007826|70000000007829|70000000007832|70000000007835|70000000007838|70000000007841|70000000007844|70000000007847|70000000007850|70000000007853|70000000007856|70000000007859|70000000007862|70000000007865|70000000007868|70000000007871|70000000007874|70000000007877|70000000007880|70000000007883|70000000007886|70000000007889|70000000007892|70000000007895|70000000007898|70000000007901|70000000007904|70000000007907|70000000007910|70000000007913|70000000007916|70000000007919|70000000007922|70000000007925|70000000007928|70000000007931|70000000007934|70000000007937|70000000007940|70000000007943|70000000007946|70000000007949|70000000007952|70000000007955|70000000007958|70000000007961|70000000007964|70000000007967|70000000007970|70000000007973|70000000007976|70000000007979|70000000007982|70000000007985|70000000007988|70000000007991|70000000007994|70000000007997|70000000008000|70000000008003|70000000008006|70000000008009|70000000008012|70000000008015|70000000008018|70000000008021|70000000008024|70000000008027|70000000008030|70000000008033|70000000008036|70000000008039|70000000008042|70000000008045|70000000008048|70000000008051|70000000008054|70000000008057|70000000008060|70000000008063|70000000008071|70000000008074|70000000008077|70000000008080|70000000008083|70000000008086|70000000008089|70000000008092|70000000008095|70000000008098|70000000008101|70000000008104|70000000008107|70000000008110|70000000008113|70000000008116|70000000008119|70000000008122|70000000008125|70000000008128|70000000008131|70000000008134|70000000008137|70000000008140|70000000008143|70000000008146|70000000008149|70000000008152|70000000008155|70000000008158|70000000008161|70000000008164|70000000008167|70000000008170|70000000008173|70000000008176|70000000008179|70000000008182|70000000008185|70000000008188|70000000008191|70000000008194|70000000008197|70000000008200|70000000008203|70000000008206|70000000008209|70000000008212|70000000008215|70000000008218|70000000008221|70000000008224|70000000008227|70000000008230|70000000008233|70000000008236|70000000008239|70000000008242|70000000008245|70000000008248|70000000008251|70000000008254|70000000008257|70000000008260|70000000008263|70000000008266|70000000008269|70000000008272|70000000008275|70000000008278|70000000008281|70000000008284|70000000008287|70000000008290|70000000008293|70000000008296|70000000008299|70000000008302|70000000008305|70000000008308|70000000008311|70000000008314|70000000008317|70000000008320|70000000008323|70000000008326|70000000008329|70000000008332|70000000008335|70000000008338|70000000008341|70000000008344|70000000008347|70000000008350|70000000008353|70000000008356|70000000008359|70000000008362|70000000008365|70000000008368|70000000008371|70000000008374|70000000008377|70000000008380|70000000008383|70000000008386|70000000008389|70000000008392|70000000008395|70000000008398|70000000008401|70000000008404|70000000008407|70000000008410|70000000008413|70000000008416|70000000008419|70000000008422|70000000008425|70000000008428|70000000008431|70000000008434|70000000008437|70000000008440|70000000008443|70000000008446|70000000008449|70000000008452|70000000008455|70000000008458|70000000008461|70000000008464|70000000008467|70000000008470|70000000008473|70000000008476|70000000008479|70000000008482|70000000008485|70000000008488|70000000008491|70000000008494|70000000008497|70000000008500|70000000008503|70000000008506|70000000008509|70000000008512|70000000008515|70000000008518|70000000008521|70000000008524|70000000008533|70000000008536|70000000008539|70000000008542|70000000008545|70000000008548|70000000008551|70000000008554|70000000008557|70000000008560|70000000008563|70000000008566|70000000008569|70000000008572|70000000008575|70000000008578|70000000008581|70000000008584|70000000008587|70000000008590|70000000008593|70000000008596|70000000008599|70000000008602|70000000008605|70000000008608|70000000008611|70000000008614|70000000008617|70000000008620|70000000008623|70000000008626|70000000008629|70000000008632|70000000008635|70000000008638|70000000008641|70000000008644|70000000008647|70000000008650|70000000008653|70000000008656|70000000008659|70000000008662|70000000008665|70000000008668|70000000008671|70000000008674|70000000008677|70000000008680|70000000008683|70000000008686|70000000008689|70000000008692|70000000008695|70000000008698|70000000008701|70000000008704|70000000008707|70000000008710|70000000008713|70000000008716|70000000008719|70000000008722|70000000008725|70000000008728|70000000008731|70000000008734|70000000008737|70000000008740|70000000008743|70000000008746|70000000008749|70000000008752|70000000008755|70000000008758|70000000008761|70000000008764|70000000008767|70000000008770|70000000008773|70000000008776|70000000008779|70000000008782|70000000008785|70000000008788|70000000008791|70000000008794|70000000008797|70000000008800|70000000024504|70000000024507|70000000024510|70000000024513|70000000024516|70000000024519|70000000024522|70000000024525|70000000024528|70000000024531|70000000024534|70000000024537|70000000024540|70000000024543|70000000024546|70000000024549|70000000024552|70000000024555|70000000024558|70000000024561|70000000024564|70000000024567|70000000024570|70000000024573|70000000024576|70000000024579|70000000024582|70000000024585|70000000024588|70000000024591|70000000024594|70000000024597|70000000024600|70000000024603|70000000024606|70000000024609']
counterex = art[art['related'] == '10000000001440|10000000001441|10000000001442|10000000001443|10000000001444|10000000001445|10000000001439|10000000001446|10000000001447|10000000001448|10000000001449|10000000001450|10000000001451|10000000001452|10000000001453|10000000001454|10000000001481']
#counterex

<h2>Partie 2 : Age moyen à l'acquisition, par genre

In [17]:
# Computing age at acquisition
art['age_at_acquisition'] = None
counter = 0
done = 0
length = len(art)
done_percentage = 0

for _,i in art.iterrows():
    counter += 1
    if pd.isnull(i['authors']) == False:
        c = re.split(r'\|', i['authors'])#split
        # If several authors: we take the average age
        if len(c) > 1:
            ages_list = [authors[authors['Id artist'] == int(b)]['Birth year'].values[0] for b in c]
            var = i['acquisition_year'] - np.nanmean(ages_list)
        # If one author: trivial
        else:
            b = c[0]
            var = i['acquisition_year'] - authors[authors['Id artist'] == int(b)]['Birth year'].values[0]

        art.at[_,'age_at_acquisition'] = var
        done += 1
    
    if counter % 100000==0:
        print(f'percentage: {round(100*counter/length, 3)}%, total = {counter}, done = {done}, done percentage = {round(100*done/length, 3)}%')

art['age_at_acquisition'] = pd.to_numeric(art['age_at_acquisition'])
art['age_at_acquisition'].count()/len(art) #18% de NaN

  from ipykernel import kernelapp as app


percentage: 26.667%, total = 100000, done = 100000, done percentage = 26.667%
percentage: 53.334%, total = 200000, done = 200000, done percentage = 53.334%
percentage: 80.002%, total = 300000, done = 299996, done percentage = 80.001%


0.8275989887784273

In [18]:
art['age_at_acquisition'].sample(10)

141390     88.0
98935     115.0
270714     68.0
23489     110.0
303548     93.0
371699    116.0
97115       NaN
242674     92.0
232301     94.0
140458     88.0
Name: age_at_acquisition, dtype: float64

In [19]:
# Function to plot the age at acquisition (separated between hommes and femmes)
def get_age_acquisition(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    total = subset.groupby('acquisition_year').mean()['age_at_acquisition']
    # Femmes
    f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').mean()
    if len(f) > 0:
        f = f['age_at_acquisition']
        f = f.reindex(range(1945, 2018))
        f.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    # Hommes
    h = subset[subset['Gender']=='masculin'].groupby('acquisition_year').mean()
    if len(h) > 0:
        h = h['age_at_acquisition']
        h = h.reindex(range(1945, 2018))
        h.plot(figsize=PLOTDIM, color=color_h, marker='.', linewidth=2, ax=ax)
    # Then we customize the plot
    # ax.set_title("Age moyen de l'artiste à l'acquisition, par genre, " + subset_name, fontsize=20)
    # ax.set_xlabel("Année d'acquisition", fontsize=20)
    # ax.set_ylabel("Age moyen de l'artiste", fontsize=20)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -1,top = 141)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=150, transparent=True)
    plt.close()

In [93]:
get_age_acquisition(art, art, "ensemble des collections", "./figures/_global/age_acquisition.png")
get_age_acquisition(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC", "./figures/_fracs/age_acquisition.png")

In [21]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_age_acquisition(art, subset, museum, "./figures/" + validname(museum) + "/age_acquisition.png")
    del subset
print('Done')

Done


## Partie 3 : ratio d'artiste Français dans les acquisitions, par genre

In [22]:
art['nationality'] = None
counter = 0
done = 0
length = len(art)
done_percentage = 0

for _,i in art.iterrows():
    counter += 1
    if pd.isnull(i['authors']) == False:
        c = re.split(r'\|', i['authors'])#split
        # If several authors: we see if they all have same gender, else "groupe"
        if len(c) > 1:
           # nat_list = [authors[authors['Id artist'] == int(b)]['nationality_clean'].values[0] for b in c if ]
            nat_list = []
            for b in c:
                if pd.notnull(authors[authors['Id artist'] == int(b)]['nationality_clean'].values[0]):
                    nat_list.append(authors[authors['Id artist'] == int(b)]['nationality_clean'].values[0])
            if any("français" in g for g in nat_list):
                var = "français"
            else:
                var = "groupe"
        # If one author: trivial
        else:
            b = c[0]
            var = authors[authors['Id artist'] == int(b)]['nationality_clean'].values[0]

        art.at[_,'nationality'] = var
        done += 1
    
    if counter % 100000==0:
        print(f'percentage: {round(100*counter/length, 3)}%, total = {counter}, done = {done}, done percentage = {round(100*done/length, 3)}%')

percentage: 26.667%, total = 100000, done = 100000, done percentage = 26.667%
percentage: 53.334%, total = 200000, done = 200000, done percentage = 53.334%
percentage: 80.002%, total = 300000, done = 299996, done percentage = 80.001%


In [23]:
authors['nationality_clean'].value_counts()

française                            17026
américaine                            1536
allemande                             1051
italienne                              757
britannique                            612
suisse                                 498
belge                                  493
espagnole                              418
japonaise                              368
néerlandaise                           321
russe                                  229
polonaise                              203
autrichienne                           192
chinoise                               144
suédoise                               134
canadienne                             124
brésilienne                            120
tchécoslovaque                         116
argentine                              107
danoise                                101
roumaine                                91
grecque                                 90
hongroise                               88
soviétique 

In [24]:
def get_ratioNat(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    total_f = subset[(subset['Gender'] == 'féminin')].groupby('acquisition_year').count()['_id']
    total_h = subset[(subset['Gender'] == 'masculin')].groupby('acquisition_year').count()['_id']
    # Same thing for men 
    h = subset[(subset['Gender'] == 'masculin') & (subset['nationality'].str.contains('français'))].groupby('acquisition_year').count()
    if len(h) == 0:
        h = total_h.copy()
        h[:] = 0
    else:
        h = h['_id']
    ratio2 = h/total_h
    ratio2 = ratio2.reindex(range(1945, 2018))
    ratio2.plot(figsize=PLOTDIM, color=color_h, marker='.', linewidth=2, ax=ax)
    # We plot the proportion of French in the subset of Femmes
    f = subset[(subset['Gender'] == 'féminin') & (subset['nationality'].str.contains('français'))].groupby('acquisition_year').count()
    if len(f) == 0:
        f = total_f.copy()
        f[:] = 0
    else:
        f = f['_id']
    ratio = f/total_f
    ratio = ratio.reindex(range(1945, 2018))
    if len(ratio) > 0:
        ratio.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    
    # Then we customize the plot
    # ax.set_title("Proportion d'artistes français dans les acquisitions, " + subset_name, fontsize=20)
    # ax.set_xlabel("Année d'acquisition", fontsize=20)
    # ax.set_ylabel("Proportion d'artistes français", fontsize=20)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -0.01,top = 1.01)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in ax.get_yticks()])
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=150, transparent=True)
    plt.close()

In [85]:
get_ratioNat(art, art, "ensemble des collections", "./figures/_global/ratio_nationality.png")
get_ratioNat(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC", "./figures/_fracs/ratio_nationality.png")

In [26]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratioNat(art, subset, museum, "./figures/" + validname(museum) + "/ratio_nationality.png")
    del subset
print('Done')

Done


## Partie 4 : ratio H/F dans les 29 domaines

In [27]:
my_cm = LinearSegmentedColormap.from_list("colormap", [color_h, color_f], N=100)

In [87]:
def get_ratio_domains(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    overall = subset.groupby('acquisition_year').count()['_id']
    overall_f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()['_id']
    overall = pd.Series(overall_f/overall, name="Tous domaines")
    total = subset.groupby(['domain', 'acquisition_year']).count()['_id']
    # We need the fraction of Femmes in each year/domain
    f = subset[(subset['Gender'] == 'féminin')].groupby(['domain', 'acquisition_year']).count()
    if len(f) == 0:
        f = total.copy()
        f[:] = 0
    else:
        f = f['_id']
    ratio = f/total
    if len(ratio) > 0:
        ratio = ratio.reset_index(name='Value')
        names = sorted(list(ratio['domain'].unique()))
        all_names = sorted(list(art['domain'].dropna().unique()))
        ratio = ratio.pivot_table(index='domain', columns='acquisition_year', values='Value', aggfunc='sum')
        ratio.index = names
        ratio = ratio.reindex(columns=range(1945, 2018)).reindex(index=all_names)
        ratio = ratio.append(overall)
        mratio = np.matrix(ratio)
        
        # Then we customize the plot
        fig, ax = plt.subplots(figsize=PLOTDIM)
        im = ax.matshow(mratio, cmap=my_cm, vmin=0, vmax=1)
        #fig.set_size_inches(PLOTDIM)
        divider = make_axes_locatable(ax)
        truc = divider.append_axes("right", size="2%", pad=-2.97)
        cbar = plt.colorbar(im, cax=truc)
        cbar.ax.set_yticklabels(['0%', '20%', '40%', '60%', '80%','100%'])
        # ax.set_title("Proportion d'artistes femmes par domaine, " + subset_name, fontsize=20, y=1)
        # ax.set_xlabel("Année d'acquisition", fontsize=20)
        ax.set_xticklabels(['50', '60', '70', '80', '90', '00', '10'], fontsize=13)
        ax.xaxis.set_ticks_position('bottom')
        ax.set_xticks(np.arange(5, 71, 10))
        domains = list(ratio.index)
        domains = [d if not "Domaine mixte" in d else "Domaine mixte" for d in domains]
        ax.set_yticks(np.arange(0, len(domains), 1))
        ax.set_yticklabels(domains, fontsize=10)
        ax.set_aspect(2.65)
    plt.savefig(path, bbox_inches='tight',format="png", dpi=150, transparent=True)
    plt.close()

In [97]:
get_ratio_domains(art, art, "ensemble des collections", "./figures/_global/ratio_domains.png")
get_ratio_domains(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC", "./figures/_fracs/ratio_domains.png")

In [64]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratio_domains(art, subset, museum, "./figures/" + validname(museum) + "/ratio_domains.png")
    del subset
print('Done')

Done


In [31]:
m = []
n = []
for x in art['collection'].dropna().unique():
    m.append(x)
    n.append(validname(x))
pd.DataFrame({"musée": m, "dossier": n}).to_csv("./figures/musées.csv", encoding='utf-8', index=False)

Du bordel pour Vincent

In [32]:
k = 0
l = 0
for _, j in authors.iterrows():
    if not pd.isnull(j['acquisition_year']):
        c = re.split(r'\|', j['acquisition_year'])
        if max([int(i) for i in c]) >= 1945:
            k += 1
        if max([int(i) for i in c]) >= 1900:
            l += 1
print(len(authors), l, k)

35958 27272 22027


## Partie 5 : Sous forme de courbe, une par mode d'acquisition, le ratio de femme

In [33]:
len(art['acquisition_mode_clean'].dropna().unique())

22

In [99]:
def get_ratio_acquisition(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    overall = subset.groupby('acquisition_year').count()['_id']
    overall_f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()['_id']
    overall = pd.Series(overall_f/overall, name="Tous modes")
    total = subset.groupby(['acquisition_mode_clean', 'acquisition_year']).count()['_id']
    # We need the fraction of Femmes in each year/mode d'acquisition
    f = subset[(subset['Gender'] == 'féminin')].groupby(['acquisition_mode_clean', 'acquisition_year']).count()
    if len(f) == 0:
        f = total.copy()
        f[:] = 0
    else:
        f = f['_id']
    ratio = f/total
    if len(ratio) > 0:
        ratio = ratio.reset_index(name='Value')
        names = sorted(list(ratio['acquisition_mode_clean'].unique()))
        all_names = sorted(list(art['acquisition_mode_clean'].dropna().unique()))
        ratio = ratio.pivot_table(index='acquisition_mode_clean', columns='acquisition_year',
                                  values='Value', aggfunc='sum')
        ratio.index = names
        ratio = ratio.reindex(columns=range(1945, 2018)).reindex(index=all_names)
        ratio = ratio.append(overall)
        mratio = np.matrix(ratio)
        
        # Then we customize the plot
        fig, ax = plt.subplots(figsize=PLOTDIM)
        im = ax.matshow(mratio, cmap=my_cm, vmin=0, vmax=1)
        #fig.set_size_inches(PLOTDIM)
        divider = make_axes_locatable(ax)
        truc = divider.append_axes("right", size="2%", pad=-2.97)
        cbar = plt.colorbar(im, cax=truc)
        cbar.ax.set_yticklabels(['0%', '20%', '40%', '60%', '80%','100%'])
        # ax.set_title("Proportion d'artistes femmes par mode d'acquisition, " + subset_name, fontsize=20, y=1)
        # ax.set_xlabel("Année d'acquisition", fontsize=20)
        ax.set_xticklabels(['50', '60', '70', '80', '90', '00', '10'], fontsize=13)
        ax.xaxis.set_ticks_position('bottom')
        ax.set_xticks(np.arange(5, 71, 10))
        domains = list(ratio.index)
        domains = [d if not "mixte" in d else "Mode d'acquisition mixte" for d in domains]
        domains = [d if not "non renseigné par commande" in d else "Non renseigné, par commande" for d in domains]
        domains = [d if not "non renseigné" in d else "Non renseigné" for d in domains]
        domains = [d if not "Douanes" in d else "Saisie des Douanes" for d in domains]
        ax.set_yticks(np.arange(0, len(domains), 1))
        ax.set_yticklabels(domains, fontsize=10)
        ax.set_aspect(2.65)
    plt.savefig(path, bbox_inches='tight',format="png", dpi=150, transparent=True)
    plt.close()

In [100]:
get_ratio_acquisition(art, art, "ensemble des collections", "./figures/_global/ratio_acquisitions.png")
get_ratio_acquisition(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC", "./figures/_fracs/ratio_acquisitions.png")

In [101]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratio_acquisition(art, subset, museum, "./figures/" + validname(museum) + "/ratio_acquisitions.png")
    del subset
print('Done')

Done
