# Préparation des figures de statistiques descriptives

### Todo : 
Produire les figures, en global et par institution :
1. Ratio Femme/reste pour les acquisitions par année depuis 1945 : **DONE**
2. Age moyen à l'acquisition des hommes et des femmes (abandonné : deux bars a cote, mettre aussi le Q1 et Q3): **DONE**
3. Ratio oeuvres d'artistes Français/reste dans les acquisitions par nationalité (une courbe pour les hommes, une pour les femmes) : **DONE**
4. Sortir sous forme de matrice colorée le ratio H/F dans les 29 domaines en fonction du temps : **DONE**
5. Sous forme de courbe, une par mode d'acquisition, le ratio de femme

### Important pour le rapport :
- sortir les figures en ratio 2:1
- format .png, background transparent

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LinearSegmentedColormap
import re
import unicodedata
# %matplotlib inline

In [2]:
pd.set_option('display.max_columns',200)
pd.set_option('display.max_rows',200)
global color_h, color_f, color_b, PLOTDIM, FREQYEARS
color_h = "gold"
color_f = "royalblue"
color_b = "lightgray"
PLOTDIM = (9,6)
FREQYEARS = 10
my_cm = LinearSegmentedColormap.from_list("colormap", [color_h, color_f], N=100)

In [3]:
matplotlib.font_manager._rebuild()
plt.rcParams['font.family'] = 'Roboto'
plt.rcParams['font.sans-serif'] = 'Roboto'
plt.rcParams['font.weight'] = 500
# matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf')

In [4]:
# Les données sont stockées dans le dossier "data", c'est plus clean
authors = pd.read_csv('../data/ALL_AUTHORS (live_work + clean nat).csv', encoding='utf-8', sep=',', low_memory=False)
raw_art = pd.read_csv('../data/ALL_ARTWORKS (for stats use ONLY).csv', encoding='utf-8', sep=',', low_memory=False)
raw_art['groupcol'] = raw_art['related'].fillna(value='') + raw_art['acquisition'].fillna(value='')
len(raw_art[raw_art['groupcol'] == ''])

121

In [5]:
# On fusionne les "séries"
art = raw_art.groupby(['groupcol', 'authors', 'acquisition_year', 'acquisition_mode_clean']).first().reset_index()
print(raw_art.shape, art.shape)

(374993, 116) (120902, 116)


In [6]:
def validname(value):
    """
    Normalizes string, converts to lowercase, removes non-alpha characters,
    and converts spaces to hyphens.
    """
    value = re.sub('[^\w\s-]', '', value).strip().lower()
    value = re.sub('[-\s]+', '-', value)
    return value

### Etape préliminaire 1-a : transformer les modes d'acquisitions

In [7]:
art.loc[art["acquisition_mode_clean"].str.contains("Don "), 'acquisition_mode_clean'] = "Don"
art.loc[art["acquisition_mode_clean"].str.contains("commande"), 'acquisition_mode_clean'] = "Commande"
art.loc[art["acquisition_mode_clean"].str.contains("Commande"), 'acquisition_mode_clean'] = "Commande"
art.loc[art["acquisition_mode_clean"].str.contains("Saisie"), 'acquisition_mode_clean'] = "Saisie"
art.loc[art["acquisition_mode_clean"].str.contains("renseigné"), 'acquisition_mode_clean'] = "Non renseigné"
art.loc[art["acquisition_mode_clean"].str.contains("mixte"), 'acquisition_mode_clean'] = "Mixte"
art.loc[art["acquisition_mode_clean"].str.contains("sinistre"), 'acquisition_mode_clean'] = "Remplacement"

### Etape préliminaire 1-b : transformer les domaines

In [8]:
art.loc[art["domain"] == "Arts du spectacle", 'domain'] = "Spectacle, musique, littérature"
art.loc[art["domain"] == "Performance", 'domain'] = "Spectacle, musique, littérature"
art.loc[art["domain"] == "Musique", 'domain'] = "Spectacle, musique, littérature"
art.loc[art["domain"] == "Littérature", 'domain'] = "Spectacle, musique, littérature"

art.loc[art["domain"] == "Commande publique (A.D.D.)", 'domain'] = "Divers"
art.loc[art["domain"] == "Design couleur", 'domain'] = "Divers"
art.loc[art["domain"] == "Commande publique (A.P.)", 'domain'] = "Divers"
art.loc[art["domain"] == "Oeuvre olfactive", 'domain'] = "Divers"
art.loc[art["domain"] == "Domaine mixte, voir détail sur les éléments", 'domain'] = "Divers"
art.loc[art["domain"] == "Domaine non saisi", 'domain'] = "Divers"
art.loc[art["domain"] == "Sans domaine déterminé", 'domain'] = "Divers"
art.loc[art["domain"] == "Certificat", 'domain'] = "Divers"
art.loc[art["domain"] == "Architecture intérieure", 'domain'] = "Divers"
art.loc[art["domain"] == "Spectacle, musique, littérature", 'domain'] = "Divers"

art.loc[art["domain"] == "Objet", 'domain'] = "Objet/Design"

art.loc[art["domain"] == "Oeuvre en 3 dimensions", 'domain'] = "Œuvre en 3D"
art.loc[art["domain"] == "Oeuvre textile", 'domain'] = "Œuvre textile"
art.loc[art["domain"] == "Reproduction photomécanique", 'domain'] = "Repro. photomécanique"
art.loc[art["domain"] == "Publication, livre, reliure", 'domain'] = "Arts du livre"

### Etape préliminaire 2 : créer les sous-dossiers s'ils n'y sont pas déjà

In [9]:
# Create subfolders to store figures by museum:
if not os.path.exists('./figures/_global'):
    os.makedirs('./figures/_global')
if not os.path.exists('./figures/_fracs'):
    os.makedirs('./figures/_fracs')
if not os.path.exists('./figures/_musees'):
    os.makedirs('./figures/_musees')
if not os.path.exists('./figures/_autres'):
    os.makedirs('./figures/_autres')
for museum in art['collection'].dropna().unique():
    n = validname(museum)
    if not os.path.exists('./figures/' + n):
        os.makedirs('./figures/' + n)

### Etape préliminaire 3 : "donner un genre" aux oeuvres

In [10]:
art['Gender'].value_counts()

masculin         95888
féminin          19282
groupe            2728
non renseigné     1439
Name: Gender, dtype: int64

In [11]:
art['Gender'].count()/len(art) # 4,4% de NaN

0.9870556318340474

### Etape préliminaire 4 : créer les regroupements de collection

In [12]:
fracs, musées, autres = [], [], []
for museum in art['collection'].dropna().unique():
    if "frac" in museum.lower():
        fracs.append(museum)
    elif "musée" in museum.lower() and "musée national d'art moderne" not in museum.lower():
        musées.append(museum)
    elif ("musée national d'art moderne" not in museum.lower() and
          "centre national des arts plastiques" not in museum.lower()):
        autres.append(museum)
len(fracs), len(musées), len(autres)

(18, 33, 5)

## Partie 1 : Proportion du nombre de femmes dans les acquisitions

In [13]:
# Function to plot the proportion of femmes in any subset of Arworks
def get_ratioF(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    total1 = subset.groupby('acquisition_year').count()['_id']
    f1 = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()
    if len(f1) == 0:
        f1 = total1.copy()
        f1[:] = 0
    else:
        f1 = f1['_id']
    f1 = f1.reindex(range(1945, 2018), fill_value=0)
    ratio = f1/total1
    ratio = ratio.reindex(range(1945, 2018))
    ratio.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -0.01,top = 1.01)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in ax.get_yticks()])
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [14]:
get_ratioF(art, art, "ensemble des collections",
           "./figures/_global/ratio_overall.png")
get_ratioF(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
           "./figures/_fracs/ratio_overall.png")
get_ratioF(art, art[art['collection'].isin(musées)], "ensemble des musées",
           "./figures/_musees/ratio_overall.png")
get_ratioF(art, art[art['collection'].isin(autres)], "autres musées",
           "./figures/_autres/ratio_overall.png")

In [15]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratioF(art, subset, museum, "./figures/" + validname(museum) + "/ratio_overall.png")
    del subset
print('Done')

Done


### Aparté

In [16]:
# A study on "is it a good idea to group by separable artworks" => NO
uniquegroups = art[art['type'] == 'separable'].groupby('related').first()
# uniquegroups = art[art['type'] == 'separable'].groupby('related').agg(lambda x:x.value_counts().index[0])
# matisse = art[art['related'] == '70000000007610|70000000007613|70000000007616|70000000007619|70000000007622|70000000007625|70000000007628|70000000007631|70000000007634|70000000007637|70000000007640|70000000007643|70000000007646|70000000007649|70000000007652|70000000007655|70000000007658|70000000007661|70000000007664|70000000007667|70000000007670|70000000007673|70000000007676|70000000007679|70000000007682|70000000007685|70000000007688|70000000007691|70000000007694|70000000007697|70000000007700|70000000007703|70000000007706|70000000007709|70000000007712|70000000007715|70000000007718|70000000007721|70000000007724|70000000007727|70000000007730|70000000007733|70000000007736|70000000007739|70000000007742|70000000007745|70000000007748|70000000007751|70000000007754|70000000007757|70000000007760|70000000007763|70000000007766|70000000007769|70000000007772|70000000007775|70000000007778|70000000007781|70000000007784|70000000007787|70000000007790|70000000007793|70000000007796|70000000007799|70000000007802|70000000007805|70000000007808|70000000007811|70000000007814|70000000007817|70000000007820|70000000007823|70000000007826|70000000007829|70000000007832|70000000007835|70000000007838|70000000007841|70000000007844|70000000007847|70000000007850|70000000007853|70000000007856|70000000007859|70000000007862|70000000007865|70000000007868|70000000007871|70000000007874|70000000007877|70000000007880|70000000007883|70000000007886|70000000007889|70000000007892|70000000007895|70000000007898|70000000007901|70000000007904|70000000007907|70000000007910|70000000007913|70000000007916|70000000007919|70000000007922|70000000007925|70000000007928|70000000007931|70000000007934|70000000007937|70000000007940|70000000007943|70000000007946|70000000007949|70000000007952|70000000007955|70000000007958|70000000007961|70000000007964|70000000007967|70000000007970|70000000007973|70000000007976|70000000007979|70000000007982|70000000007985|70000000007988|70000000007991|70000000007994|70000000007997|70000000008000|70000000008003|70000000008006|70000000008009|70000000008012|70000000008015|70000000008018|70000000008021|70000000008024|70000000008027|70000000008030|70000000008033|70000000008036|70000000008039|70000000008042|70000000008045|70000000008048|70000000008051|70000000008054|70000000008057|70000000008060|70000000008063|70000000008071|70000000008074|70000000008077|70000000008080|70000000008083|70000000008086|70000000008089|70000000008092|70000000008095|70000000008098|70000000008101|70000000008104|70000000008107|70000000008110|70000000008113|70000000008116|70000000008119|70000000008122|70000000008125|70000000008128|70000000008131|70000000008134|70000000008137|70000000008140|70000000008143|70000000008146|70000000008149|70000000008152|70000000008155|70000000008158|70000000008161|70000000008164|70000000008167|70000000008170|70000000008173|70000000008176|70000000008179|70000000008182|70000000008185|70000000008188|70000000008191|70000000008194|70000000008197|70000000008200|70000000008203|70000000008206|70000000008209|70000000008212|70000000008215|70000000008218|70000000008221|70000000008224|70000000008227|70000000008230|70000000008233|70000000008236|70000000008239|70000000008242|70000000008245|70000000008248|70000000008251|70000000008254|70000000008257|70000000008260|70000000008263|70000000008266|70000000008269|70000000008272|70000000008275|70000000008278|70000000008281|70000000008284|70000000008287|70000000008290|70000000008293|70000000008296|70000000008299|70000000008302|70000000008305|70000000008308|70000000008311|70000000008314|70000000008317|70000000008320|70000000008323|70000000008326|70000000008329|70000000008332|70000000008335|70000000008338|70000000008341|70000000008344|70000000008347|70000000008350|70000000008353|70000000008356|70000000008359|70000000008362|70000000008365|70000000008368|70000000008371|70000000008374|70000000008377|70000000008380|70000000008383|70000000008386|70000000008389|70000000008392|70000000008395|70000000008398|70000000008401|70000000008404|70000000008407|70000000008410|70000000008413|70000000008416|70000000008419|70000000008422|70000000008425|70000000008428|70000000008431|70000000008434|70000000008437|70000000008440|70000000008443|70000000008446|70000000008449|70000000008452|70000000008455|70000000008458|70000000008461|70000000008464|70000000008467|70000000008470|70000000008473|70000000008476|70000000008479|70000000008482|70000000008485|70000000008488|70000000008491|70000000008494|70000000008497|70000000008500|70000000008503|70000000008506|70000000008509|70000000008512|70000000008515|70000000008518|70000000008521|70000000008524|70000000008533|70000000008536|70000000008539|70000000008542|70000000008545|70000000008548|70000000008551|70000000008554|70000000008557|70000000008560|70000000008563|70000000008566|70000000008569|70000000008572|70000000008575|70000000008578|70000000008581|70000000008584|70000000008587|70000000008590|70000000008593|70000000008596|70000000008599|70000000008602|70000000008605|70000000008608|70000000008611|70000000008614|70000000008617|70000000008620|70000000008623|70000000008626|70000000008629|70000000008632|70000000008635|70000000008638|70000000008641|70000000008644|70000000008647|70000000008650|70000000008653|70000000008656|70000000008659|70000000008662|70000000008665|70000000008668|70000000008671|70000000008674|70000000008677|70000000008680|70000000008683|70000000008686|70000000008689|70000000008692|70000000008695|70000000008698|70000000008701|70000000008704|70000000008707|70000000008710|70000000008713|70000000008716|70000000008719|70000000008722|70000000008725|70000000008728|70000000008731|70000000008734|70000000008737|70000000008740|70000000008743|70000000008746|70000000008749|70000000008752|70000000008755|70000000008758|70000000008761|70000000008764|70000000008767|70000000008770|70000000008773|70000000008776|70000000008779|70000000008782|70000000008785|70000000008788|70000000008791|70000000008794|70000000008797|70000000008800|70000000024504|70000000024507|70000000024510|70000000024513|70000000024516|70000000024519|70000000024522|70000000024525|70000000024528|70000000024531|70000000024534|70000000024537|70000000024540|70000000024543|70000000024546|70000000024549|70000000024552|70000000024555|70000000024558|70000000024561|70000000024564|70000000024567|70000000024570|70000000024573|70000000024576|70000000024579|70000000024582|70000000024585|70000000024588|70000000024591|70000000024594|70000000024597|70000000024600|70000000024603|70000000024606|70000000024609']
counterex = art[art['related'] == '10000000001440|10000000001441|10000000001442|10000000001443|10000000001444|10000000001445|10000000001439|10000000001446|10000000001447|10000000001448|10000000001449|10000000001450|10000000001451|10000000001452|10000000001453|10000000001454|10000000001481']
#counterex

<h2>Partie 2 : Age moyen à l'acquisition, par genre

In [17]:
# art['age_at_acquisition'].sample(10)

In [18]:
# Function to plot the age at acquisition (separated between hommes and femmes)
def get_age_acquisition(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    total = subset.groupby('acquisition_year').mean()['age_at_acquisition']
    # Femmes
    f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').mean()
    if len(f) > 0:
        f = f['age_at_acquisition']
        f = f.reindex(range(1945, 2018))
        f.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    # Hommes
    h = subset[subset['Gender']=='masculin'].groupby('acquisition_year').mean()
    if len(h) > 0:
        h = h['age_at_acquisition']
        h = h.reindex(range(1945, 2018))
        h.plot(figsize=PLOTDIM, color=color_h, marker='.', linewidth=2, ax=ax)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -1,top = 141)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [19]:
get_age_acquisition(art, art, "ensemble des collections",
                    "./figures/_global/age_acquisition.png")
get_age_acquisition(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
                    "./figures/_fracs/age_acquisition.png")
get_age_acquisition(art, art[art['collection'].isin(musées)], "ensemble des musées",
                    "./figures/_musees/age_acquisition.png")
get_age_acquisition(art, art[art['collection'].isin(autres)], "autres musées",
                    "./figures/_autres/age_acquisition.png")

In [20]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_age_acquisition(art, subset, museum, "./figures/" + validname(museum) + "/age_acquisition.png")
    del subset
print('Done')

Done


## Partie 3 : ratio d'artiste Français dans les acquisitions, par genre

In [21]:
# art['nationality'].value_counts()

In [22]:
def get_ratioNat(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    total_f = subset[(subset['Gender'] == 'féminin')].groupby('acquisition_year').count()['_id']
    total_h = subset[(subset['Gender'] == 'masculin')].groupby('acquisition_year').count()['_id']
    # We plot the proportion of French in the subset of men
    h = subset[(subset['Gender'] == 'masculin') & (subset['nationality'].str.contains('français'))].groupby('acquisition_year').count()
    if len(h) > 0:
        h = h['_id']
        h = h.reindex(range(1945, 2018), fill_value=0)
        ratio2 = h/total_h
    else:
        ratio2 = total_h.copy()
        ratio2[:] = 0
    ratio2 = ratio2.reindex(range(1945, 2018))
    ratio2.plot(figsize=PLOTDIM, color=color_h, marker='.', linewidth=2, ax=ax)
    # Same thing for women
    f = subset[(subset['Gender'] == 'féminin') & (subset['nationality'].str.contains('français'))].groupby('acquisition_year').count()
    if len(f) > 0:
        f = f['_id']
        f = f.reindex(range(1945, 2018), fill_value=0)
        ratio = f/total_f
    else:
        ratio = total_f.copy()
        ratio[:] = 0
    ratio = ratio.reindex(range(1945, 2018))
    ratio.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -0.01,top = 1.01)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in ax.get_yticks()])
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [23]:
get_ratioNat(art, art, "ensemble des collections",
             "./figures/_global/ratio_nationality.png")
get_ratioNat(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
             "./figures/_fracs/ratio_nationality.png")
get_ratioNat(art, art[art['collection'].isin(musées)], "musées",
             "./figures/_musees/ratio_nationality.png")
get_ratioNat(art, art[art['collection'].isin(autres)], "autres musées",
             "./figures/_autres/ratio_nationality.png")

In [24]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratioNat(art, subset, museum, "./figures/" + validname(museum) + "/ratio_nationality.png")
    del subset
print('Done')

Done


## Partie 4 : ratio H/F dans les domaines

In [25]:
def get_ratio_domains(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    overall = subset.groupby('acquisition_year').count()['_id']
    overall_f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()['_id']
    overall_f = overall_f.reindex(range(1945, 2018), fill_value=0)
    overall = pd.Series(overall_f/overall, name="Tous domaines")
    total = subset.groupby(['domain', 'acquisition_year']).count()['_id']
    # We need the fraction of Femmes in each year/domain
    f = subset[(subset['Gender'] == 'féminin')].groupby(['domain', 'acquisition_year']).count()['_id']
    ratio = f/total
    if len(ratio) > 0:
        ratio = ratio.reset_index(name='Value')
        names = sorted(list(ratio['domain'].unique()))
        all_names = sorted(list(art['domain'].dropna().unique()))
        ratio = ratio.pivot_table(index='domain', columns='acquisition_year', values='Value', aggfunc='sum')
        ratio.index = names
        ratio = ratio.reindex(columns=range(1945, 2018)).reindex(index=all_names)
        ratio = ratio.append(overall)
        mratio = np.matrix(ratio)
        
        # Then we customize the plot
        fig, ax = plt.subplots(figsize=PLOTDIM)
        im = ax.matshow(mratio, cmap=my_cm, vmin=0, vmax=1)
        #fig.set_size_inches(PLOTDIM)
        divider = make_axes_locatable(ax)
        truc = divider.append_axes("right", size="2%", pad=-4.4)
        cbar = plt.colorbar(im, cax=truc)
        cbar.ax.set_yticklabels(['0%', '20%', '40%', '60%', '80%','100%'], fontsize=10)
        
        ax.set_xticklabels(['1950', '', '1970', '', '1990', '', '2010'], fontsize=10)
        ax.xaxis.set_ticks_position('bottom')
        ax.set_xticks(np.arange(5, 71, 10))
        domains = list(ratio.index)
        domains = [d if not "Domaine mixte" in d else "Domaine mixte" for d in domains]
        ax.set_yticks(np.arange(0, len(domains), 1))
        ax.set_yticklabels(domains, fontsize=10)
        ax.set_aspect(5)
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [26]:
get_ratio_domains(art, art, "ensemble des collections",
                  "./figures/_global/ratio_domains.png")
get_ratio_domains(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
                  "./figures/_fracs/ratio_domains.png")
get_ratio_domains(art, art[art['collection'].isin(musées)], "ensemble des musées",
                  "./figures/_musees/ratio_domains.png")
get_ratio_domains(art, art[art['collection'].isin(autres)], "autres musées",
                  "./figures/_autres/ratio_domains.png")

In [27]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratio_domains(art, subset, museum, "./figures/" + validname(museum) + "/ratio_domains.png")
    del subset
print('Done')

Done


### Du bordel pour Vincent

In [28]:
m = []
n = []
for x in art['collection'].dropna().unique():
    m.append(x)
    n.append(validname(x))
pd.DataFrame({"musée": m, "dossier": n}).to_csv("./figures/musées.csv", encoding='utf-8', index=False)

In [29]:
k = 0
l = 0
for _, j in authors.iterrows():
    if not pd.isnull(j['acquisition_year']):
        c = re.split(r'\|', j['acquisition_year'])
        if max([int(i) for i in c]) >= 1945:
            k += 1
        if max([int(i) for i in c]) >= 1900:
            l += 1
print(len(authors), l, k)

35958 27272 22027


## Partie 5 : Ratio par mode d'acquisition

In [30]:
len(art['acquisition_mode_clean'].dropna().unique())

15

In [31]:
def get_ratio_acquisition(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    overall = subset.groupby('acquisition_year').count()['_id']
    overall_f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()['_id']
    overall_f = overall_f.reindex(range(1945, 2018), fill_value=0)
    overall = pd.Series(overall_f/overall, name="Tous modes")
    total = subset.groupby(['acquisition_mode_clean', 'acquisition_year']).count()['_id']
    # We need the fraction of Femmes in each year/mode d'acquisition
    f = subset[(subset['Gender'] == 'féminin')].groupby(['acquisition_mode_clean', 'acquisition_year']).count()['_id']
    ratio = f/total
    if len(ratio) > 0:
        ratio = ratio.reset_index(name='Value')
        names = sorted(list(ratio['acquisition_mode_clean'].unique()))
        all_names = sorted(list(art['acquisition_mode_clean'].dropna().unique()))
        ratio = ratio.pivot_table(index='acquisition_mode_clean', columns='acquisition_year',
                                  values='Value', aggfunc='sum')
        ratio.index = names
        ratio = ratio.reindex(columns=range(1945, 2018)).reindex(index=all_names)
        ratio = ratio.append(overall)
        mratio = np.matrix(ratio)
        
        # Then we customize the plot
        fig, ax = plt.subplots(figsize=PLOTDIM)
        im = ax.matshow(mratio, cmap=my_cm, vmin=0, vmax=1)
        #fig.set_size_inches(PLOTDIM)
        divider = make_axes_locatable(ax)
        truc = divider.append_axes("right", size="2%", pad=-4.4)
        cbar = plt.colorbar(im, cax=truc)
        cbar.ax.set_yticklabels(['0%', '20%', '40%', '60%', '80%','100%'], fontsize=10)
        
        ax.set_xticklabels(['1950', '', '1970', '', '1990', '', '2010'], fontsize=10)
        ax.xaxis.set_ticks_position('bottom')
        ax.set_xticks(np.arange(5, 71, 10))
        domains = list(ratio.index)
        domains = [d if not "mixte" in d else "Mode d'acquisition mixte" for d in domains]
        domains = [d if not "non renseigné par commande" in d else "Non renseigné, par commande" for d in domains]
        # domains = [d if not "non renseigné" in d else "Non renseigné" for d in domains]
        domains = [d if not "Douanes" in d else "Saisie des Douanes" for d in domains]
        ax.set_yticks(np.arange(0, len(domains), 1))
        ax.set_yticklabels(domains, fontsize=10)
        ax.set_aspect(5)
    # plt.plot()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [32]:
get_ratio_acquisition(art, art, "ensemble des collections",
                      "./figures/_global/ratio_acquisitions.png")
get_ratio_acquisition(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
                      "./figures/_fracs/ratio_acquisitions.png")
get_ratio_acquisition(art, art[art['collection'].isin(musées)], "ensemble des musées",
                      "./figures/_musees/ratio_acquisitions.png")
get_ratio_acquisition(art, art[art['collection'].isin(autres)], "autres musées",
                      "./figures/_autres/ratio_acquisitions.png")

In [33]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_ratio_acquisition(art, subset, museum, "./figures/" + validname(museum) + "/ratio_acquisitions.png")
    del subset
print('Done')

Done


In [34]:
subset = art[art['collection'] == "Musée national d'art moderne / Centre de création industrielle"]
overall = subset.groupby('acquisition_year').count()['_id']
overall_f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()['_id'].reindex(range(1945, 2018), fill_value=0)
overall = pd.Series(overall_f/overall, name="Tous modes")

In [35]:
art1 = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
# art1[art1["ensemble_id"] == 12].sample(10)

## Partie 6 : nombre de "points de contacts" par artiste

In [36]:
def get_contact_points(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    art_contacts = art.groupby(['authors', 'acquisition_year']).first().reset_index()
    sub_contacts = subset.groupby(['authors', 'acquisition_year']).first().reset_index()
    fig, ax = plt.subplots()
    total = sub_contacts.groupby('acquisition_year').count()['_id']
    f1 = sub_contacts[sub_contacts['Gender']=='féminin'].groupby('acquisition_year').count()
    f1 = f1['_id']
    f1 = f1.reindex(range(1945, 2018), fill_value=0)
    total = total.reindex(range(1945, 2018))
    ratio = f1/total
    ratio = ratio.reindex(range(1945, 2018))
    ratio.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -0.01,top = 1.01)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in ax.get_yticks()])
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [37]:
get_contact_points(art, art, "ensemble des collections",
                      "./figures/_global/ratio_contacts.png")
get_contact_points(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
                      "./figures/_fracs/ratio_contacts.png")
get_contact_points(art, art[art['collection'].isin(musées)], "ensemble des musées",
                      "./figures/_musees/ratio_contacts.png")
get_contact_points(art, art[art['collection'].isin(autres)], "autres musées",
                      "./figures/_autres/ratio_contacts.png")

In [38]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_contact_points(art, subset, museum, "./figures/" + validname(museum) + "/ratio_contacts.png")
    del subset
print('Done')

Done


## Partie 7 : age de l'artiste à la première aquisition 

In [39]:
# Function to plot the age at acquisition (separated between hommes and femmes)
def get_age_firt_acqu(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    subset = subset[subset['is_first'] == 1]
    fig, ax = plt.subplots()
    total = subset.groupby('acquisition_year').mean()['age_at_acquisition']
    # Femmes
    f = subset[subset['Gender']=='féminin'].groupby('acquisition_year').mean()
    if len(f) > 0:
        f = f['age_at_acquisition']
        f = f.reindex(range(1945, 2018))
        f.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    # Hommes
    h = subset[subset['Gender']=='masculin'].groupby('acquisition_year').mean()
    if len(h) > 0:
        h = h['age_at_acquisition']
        h = h.reindex(range(1945, 2018))
        h.plot(figsize=PLOTDIM, color=color_h, marker='.', linewidth=2, ax=ax)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -1,top = 141)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [40]:
get_age_firt_acqu(art, art, "ensemble des collections",
                      "./figures/_global/age_first_acquisition.png")
get_age_firt_acqu(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
                      "./figures/_fracs/age_first_acquisition.png")
get_age_firt_acqu(art, art[art['collection'].isin(musées)], "ensemble des musées",
                      "./figures/_musees/age_first_acquisition.png")
get_age_firt_acqu(art, art[art['collection'].isin(autres)], "autres musées",
                      "./figures/_autres/age_first_acquisition.png")

KeyError: 'is_first'

In [None]:
for museum in art['collection'].dropna().unique():
    # print("Musée en cours:", museum)
    subset = art[art['collection'] == museum]
    get_age_firt_acqu(art, subset, museum, "./figures/" + validname(museum) + "/age_first_acquisition.png")
    del subset
print('Done')

## Partie 8 : nombre d'aquisitions

In [None]:
# Function to plot the proportion of femmes in any subset of Arworks
def get_number_acqu(art, subset, subset_name, path):
    art = art[(art['acquisition_year'] >= 1945) & (art['acquisition_year'] <= 2017)]
    subset = subset[(subset['acquisition_year'] >= 1945) & (subset['acquisition_year'] <= 2017)]
    fig, ax = plt.subplots()
    total1 = subset.groupby('acquisition_year').count()['_id']
    f1 = subset[subset['Gender']=='féminin'].groupby('acquisition_year').count()
    f2 = subset[subset['Gender']=='masculin'].groupby('acquisition_year').count()
    f1 = f1['_id']
    f2 = f2['_id']
    f1 = f1.reindex(range(1945, 2018))
    f2 = f2.reindex(range(1945, 2018))
    f1.plot(figsize=PLOTDIM, color=color_f, marker='.', linewidth=2, ax=ax)
    f2.plot(figsize=PLOTDIM, color=color_h, marker='.', linewidth=2, ax=ax)
    ax.set_xlim(left = 1945,right = 2017)
    ax.set_ylim(bottom = -10,top = 12010)
    ax.tick_params(labelsize = 15)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(FREQYEARS))
    ax.set_xlabel('')
    plt.grid()
    plt.savefig(path, bbox_inches='tight',format="png", dpi=300, transparent=True)
    plt.close()

In [None]:
get_number_acqu(art, art, "ensemble des collections",
                      "./figures/_global/nb_acquisitions.png")
get_number_acqu(art, art[art['collection'].isin(fracs)], "ensemble des 18 FRAC",
                      "./figures/_fracs/nb_acquisitions.png")
get_number_acqu(art, art[art['collection'].isin(musées)], "ensemble des musées",
                      "./figures/_musees/nb_acquisitions.png")
get_number_acqu(art, art[art['collection'].isin(autres)], "autres musées",
                      "./figures/_autres/nb_acquisitions.png")

In [None]:
art[(art['acquisition_year'] == 2004) & (art['authors'] == '9000000000066601')]

In [83]:
index = [len(set(x)) > 1 for x in authors['ID artworks'].str.split('|')]
index1, index2, index3 = [], [], []
for x in authors['acquisition_year'].str.split('|'):
    if x is np.NaN: index1.append(False), index2.append(False), index3.append(False)
    else: index1.append(min([int(y) for y in x]) > 1944), index2.append(len(set(x)) > 1), index3.append(min([int(y) for y in x]) < 2010)
len(authors[[index[i] and index1[i] and index2[i] and index3[i] for i in range(len(index))]])

6838