# Initial research questions

* comparative analysis of gender representation in artwork creation between born digital and analogue art collections
* with also, potentially, some details on medium, location…
* semantic analysis of the narrative about the artworks or what are the keywords associated with different artwork types

### The story could be:
The internet was supposed to revolutionize things, so how did it do when looking at who makes art and who gets included in collections?


A simple way to plan your work is:

 * choose the research question
 * map the question to pieces of information needed to answer the question (e.g. periods, countings)
 * map the data to specific data types (categorical, numerical, ordinal)
 * choose the plot(s) that better help you to visualise some pattern (e.g. a bar chart)
 * get your data in some form (SPARQL query results)
 * filter/ manipulate your data (select the variables that matter, make operations like countings) 
 * create a data structure that fits the plotting requirements (a table, a JSON etc) including the number of variables needed (e.g. one categorical and one numerical)


In [1]:
#imports 

import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from plotly.subplots import make_subplots
import pycountry
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from wordcloud import *
from functools import reduce
pd.set_option('display.max_rows', 200)
from statsmodels.graphics.mosaicplot import mosaic
from plotly.subplots import make_subplots

In [2]:
# upload datasets 
path = './'

#complete DFs
rhz_artworks = pd.read_pickle(path+'Rhizome_data/rhizome_artworks_extra.pkl')
rhz_artists = pd.read_pickle(path+'Rhizome_data/rhizome_artists_extra.pkl')
moma_artists = pd.read_pickle(path+'MOMA_data/pickle/MoMAArtists.pkl')
moma_artworks = pd.read_pickle(path+'MOMA_data/pickle/MoMAartworks.pkl')
moma_artworks_old =  pd.read_pickle(path+'MOMA_data/pickle/old_artworks.pkl')
moma_artworks_new =  pd.read_pickle(path+'MOMA_data/pickle/new_artworks.pkl')
moma_artworks_new['DateAcquired'] = moma_artworks_new['DateAcquired'].replace('nan', str('0'))
moma_artworks_new['DateAcquired']=moma_artworks_new['DateAcquired'].astype('int')
moma_artworks_new =  moma_artworks_new.loc[moma_artworks_new['DateAcquired'] >= 1980]
moma_rhz_compare = moma_artworks_new.loc[moma_artworks_new['DateAcquired'] >= 2000]
moma_rhz_compare = moma_rhz_compare.loc[moma_rhz_compare['DateCreated'] >= 1983]

#MoMA department DFs
moma_arch_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_cont.pkl')
moma_arch_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_mod.pkl')
moma_design_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_img_cont.pkl')
moma_design_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_img_mod.pkl')
moma_draw_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/draws_prints_cont.pkl')
moma_draw_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/draws_prints_mod.pkl')
moma_films_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/films_cont.pkl')
moma_films_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/films_mod.pkl')
moma_fluxus_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/fluxus_cont.pkl')
moma_fluxus_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/fluxus_mod.pkl')
moma_media_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/media_perf_cont.pkl')
moma_media_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/media_perf_mod.pkl')
moma_paint_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/paint_sculp_cont.pkl')
moma_paint_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/paint_sculp_mod.pkl')
moma_photo_cont = pd.read_pickle(path+'MOMA_data/pickle/departments/photo_cont.pkl')
moma_photo_mod = pd.read_pickle(path+'MOMA_data/pickle/departments/photo_mod.pkl')

#Rhizome with text
rhizome_txt_clean = pd.read_pickle(path+'Rhizome_data/rhizome_artworks_extra_text_clean.pkl')
rhizome_txt_stop_kw = pd.read_pickle(path+'Rhizome_data/rhizome_artworks_extra_text_clean_stop_keywords.pkl')

#MoMA with text
moma_arch_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_cont_text_final.pkl')
moma_arch_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_mod_text_only_final.pkl')
moma_draw_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/draws_prints_cont_text_final.pkl')
moma_draw_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/draws_prints_mod_text_final.pkl')
moma_films_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/films_cont_text_final.pkl')
moma_films_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/films_mod_text_final.pkl')
moma_fluxus_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/fluxus_cont_text_final.pkl')
moma_fluxus_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/fluxus_mod_text_final.pkl')
moma_media_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/media_perf_cont_text_final.pkl')
moma_media_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/media_perf_mod_text_final.pkl')
moma_paint_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/paint_sculp_cont_text_final.pkl')
moma_paint_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/paint_sculp_mod_text_final.pkl')
moma_photo_cont_text = pd.read_pickle(path+'MOMA_data/pickle/departments/photo_cont_text_final.pkl')
moma_photo_mod_text = pd.read_pickle(path+'MOMA_data/pickle/departments/photo_mod_text_final.pkl')

#moma text stop
moma_arch_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_cont_text_final_stop.pkl')
moma_arch_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/architecture_design_mod_text_only_final_stop.pkl')
moma_draw_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/draws_prints_cont_text_final_stop.pkl')
moma_draw_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/draws_prints_mod_text_final_stop.pkl')
moma_films_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/films_cont_text_final_stop.pkl')
moma_films_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/films_mod_text_final_stop.pkl')
moma_fluxus_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/fluxus_cont_text_final_stop.pkl')
moma_fluxus_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/fluxus_mod_text_final_stop.pkl')
moma_media_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/media_perf_cont_text_final_stop.pkl')
moma_media_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/media_perf_mod_text_final_stop.pkl')
moma_paint_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/paint_sculp_cont_text_final_stop.pkl')
moma_paint_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/paint_sculp_mod_text_final_stop.pkl')
moma_photo_cont_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/photo_cont_text_final_stop.pkl')
moma_photo_mod_text_stop = pd.read_pickle(path+'MOMA_data/pickle/departments/photo_mod_text_final_stop.pkl')

## 4 What is the nationality representation in each datasets?


Viz: stacked area chart (Chiara) + breakdown by decades 
Story: AMERICA!!! BUT ALSO FRANCE!!!! Basically colonialism. 

does nationality representation change over time as a percentage of the total acquisitions? is there a way to talk about representation based on nationality in the story? 


In [3]:
# create working df for MoMA complete
MoMA_complete = pd.concat([moma_artworks_old,moma_artworks_new])
nationalities = ', '.join(MoMA_complete.Nationality)
nationalities = list(set(nationalities.split(', ')))
df_moma_complete = pd.DataFrame(columns= ["Nationality", "DateAcquired", "Count", "Females"]) 

for dep in nationalities:
    nat_sub =  MoMA_complete[MoMA_complete.Nationality == dep]    
    nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
    years = ', '.join(nat_sub.DateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.DateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        df_moma_complete.loc[len(df_moma_complete.index)] = [dep, year, entries_year, f_count]

df_moma_complete.DateAcquired = df_moma_complete.DateAcquired.astype('int')
df_moma_complete

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
A value is trying to be set on 

Unnamed: 0,Nationality,DateAcquired,Count,Females
0,Italian,1932,1,0
1,Italian,1934,2,0
2,Italian,1935,3,0
3,Italian,1936,2,0
4,Italian,1937,1,0
...,...,...,...,...
2436,Guatemalan,1961,1,0
2437,Guatemalan,1964,2,0
2438,Guatemalan,1966,1,0
2439,Guatemalan,1970,2,0


In [4]:
# create working df for Rhizome
nationalities = ', '.join(rhz_artworks.Nationality)
nationalities = list(set(nationalities.split(', ')))
df_rhz_nats = pd.DataFrame(columns= ["Nationality", "DateAcquired", "Count", "Females"]) 


for dep in nationalities:
    nat_sub =  rhz_artworks[rhz_artworks.Nationality == dep]    
    nat_sub.dateAcquired = nat_sub.dateAcquired.astype('str')
    years = ', '.join(nat_sub.dateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.dateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        df_rhz_nats.loc[len(df_rhz_nats.index)] = [dep, year, entries_year, f_count]

df_rhz_nats.DateAcquired = df_rhz_nats.DateAcquired.astype('int')
df_rhz_nats


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.dateAcquired = nat_sub.dateAcquired.astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.dateAcquired = nat_sub.dateAcquired.astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.dateAcquired = nat_sub.dateAcquired.astype('str')
A value is trying to be set on 

Unnamed: 0,Nationality,DateAcquired,Count,Females
0,Italian,2001,4,0
1,Italian,2002,9,3
2,Italian,2003,3,0
3,Italian,2005,8,5
4,Italian,2006,3,0
...,...,...,...,...
281,South Korean,2001,2,2
282,South Korean,2002,3,1
283,South Korean,2004,1,0
284,South Korean,2005,2,2


In [5]:
# create Moma's decades
df1 = df_moma_complete.copy()
dfmissing = df1[df1.DateAcquired == 0]
df40 = df1[(df1.DateAcquired >0) &(df1.DateAcquired <1949)]
df50 = df1[(df1.DateAcquired >=1950) &(df1.DateAcquired <1959)]
df60 = df1[(df1.DateAcquired >=1960) &(df1.DateAcquired <1969)]
df70 = df1[(df1.DateAcquired >=1970) &(df1.DateAcquired <1979)]
df80 = df1[(df1.DateAcquired >=1980) &(df1.DateAcquired <1989)]
df90 = df1[(df1.DateAcquired >=1990) &(df1.DateAcquired <1999)]
df2000 = df1[(df1.DateAcquired >=2000) &(df1.DateAcquired <2009)]
df2010 = df1[(df1.DateAcquired >=2010) &(df1.DateAcquired <2019)]
df2020 = df1[(df1.DateAcquired >=2020) &(df1.DateAcquired <2029)]

In [133]:
# create Rhizome's decades
df2 = df_rhz_nats.copy()
df2missing = df2[df2.DateAcquired == 0]
df290 = df2[(df2.DateAcquired >0) &(df2.DateAcquired <2000)]
df22000 = df2[(df2.DateAcquired >=2000) &(df2.DateAcquired <2009)]
df22010 = df2[(df2.DateAcquired >=2010) &(df2.DateAcquired <2019)]
df22020 = df2[(df2.DateAcquired >=2020) &(df2.DateAcquired <2029)]


### Plots: Area charts 
##### Variables: Acquisition date, Nationality, Artworks count for nationality

### Moma acquisition of nationalities over decades

In [7]:
#plot Acquisition distribution over decades
fig11 = px.area(df40, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 1940's")
fig11.show()
fig12 = px.area(df50, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 1950's")
fig12.show()
fig13 = px.area(df60, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 1960's")
fig13.show()
fig14 = px.area(df70, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 1970's")
fig14.show()

fig15 = px.area(df80, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 1980's")
fig15.show()
fig16 = px.area(df90, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 1990's")
fig16.show()
fig17 = px.area(df2000, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 2000's")
fig17.show()
fig18 = px.area(df2010, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 2010's")
fig18.show()


### Rhizome acquisition of nationalities over decades

In [8]:
#plot Acquisition distribution over decades

fig20 = px.area(df22000, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 2000's")
fig20.show()
fig21 = px.area(df22010, x="DateAcquired", y="Count", color="Nationality", line_group="Nationality", title="Artworks acquired in 2010's")
fig21.show()



## 5a How does nationality representation within each dataset compare? 
-> across time OR total OR medium

Viz: 

1/ rhizome v moma1983 w/ sampling (shrink down by 100%?) over time w/ line plot 

2/ moma pre v moma post w/ bar or column charts (maybe stacked bar chart) 

Story: ?? 



In [135]:
#create working DF for piecharts
old = moma_artworks_old.copy()
old['Period'] = 'Contemporary'
new = moma_artworks_new.copy()
new['Period'] = 'Modern'

#merge 
frames = [old, new]
moma_artworks_two_periods = pd.concat(frames)

#count occurrences of department values before and after 1980
before_n = old['Nationality'].value_counts().rename_axis('Nationality').reset_index(name='Contemporary')
after_n = new['Nationality'].value_counts().rename_axis('Nationality').reset_index(name='Modern')
working_df= pd.merge(before_n, after_n, left_on='Nationality', right_on='Nationality')
working_df

Unnamed: 0,Nationality,Contemporary,Modern
0,American,39754,16929
1,French,21683,904
2,German,6666,2501
3,missing,5740,552
4,British,4035,1533
5,Spanish,2566,431
6,Italian,2130,644
7,Russian,2081,109
8,Japanese,1695,719
9,Swiss,1367,738


In [136]:
# create working df for MoMA new sample
MomaNsample = moma_rhz_compare.sample(n=286)
len(MomaNsample), len(df_rhz_nats)
nationalities = ', '.join(MomaNsample.Nationality)
nationalities = list(set(nationalities.split(', ')))
df_MNsample_nats = pd.DataFrame(columns= ["Nationality", "DateAcquired", "Count", "Females"]) 

for dep in nationalities:
    nat_sub =  MomaNsample[MomaNsample.Nationality == dep]    
    nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
    years = ', '.join(nat_sub.DateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.DateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        df_MNsample_nats.loc[len(df_MNsample_nats.index)] = [dep, year, entries_year, f_count]

df_MNsample_nats.DateAcquired = df_MNsample_nats.DateAcquired.astype('int')
df_MNsample_nats



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Unnamed: 0,Nationality,DateAcquired,Count,Females
0,Chilean,2017,1,1
1,Italian,2000,1,1
2,Italian,2008,1,0
3,Dutch,2005,1,0
4,Polish,2010,1,0
5,Polish,2015,1,0
6,Canadian,2000,1,0
7,Canadian,2004,1,0
8,Canadian,2005,2,0
9,Canadian,2011,1,0


In [137]:
#create df with common nationalities for comparison 
nats_M = set(df_MNsample_nats.Nationality)
nats_RH = set(df_rhz_nats.Nationality)
common_nats = nats_M & nats_RH

Moma_common_nats_df = pd.DataFrame()
Rh_common_nats_df = pd.DataFrame()
for item in common_nats:
    #moma
    moma = df_MNsample_nats[df_MNsample_nats.Nationality == item].drop('Females', axis = 1)
    Moma_common_nats_df = pd.concat([Moma_common_nats_df, moma])
    #rhizme
    rh = df_rhz_nats[df_rhz_nats.Nationality == item].drop('Females', axis = 1)
    Rh_common_nats_df = pd.concat([Rh_common_nats_df, rh])
    


Moma_common_nats_df = Moma_common_nats_df.rename(columns={'Count':'CountM'})
Rh_common_nats_df = Rh_common_nats_df.rename(columns={'Count':'CountR'})

workingDF = pd.concat([Moma_common_nats_df, Rh_common_nats_df], axis=0, ignore_index=True)
workingDF['CountM'] = workingDF['CountM'].fillna('0').astype('int')
workingDF['CountR'] = workingDF['CountR'].fillna('0').astype('int')
workingDF['DateAcquired'] = workingDF['DateAcquired'].astype('int')
workingDF = workingDF[workingDF.DateAcquired > 0]
workingDF

Unnamed: 0,Nationality,DateAcquired,CountM,CountR
0,Chilean,2017,1,0
1,Italian,2000,1,0
2,Italian,2008,1,0
3,Dutch,2005,1,0
4,Polish,2010,1,0
...,...,...,...,...
217,Japanese,2003,0,2
218,Japanese,2004,0,2
219,Japanese,2005,0,7
220,Japanese,2006,0,5


In [149]:
#NB: this list is taking into consideration only artworkks by single artists, as no collective matched
set(workingDF.Nationality)

{'American',
 'Argentine',
 'Austrian',
 'British',
 'Canadian',
 'Chilean',
 'Chinese',
 'Colombian',
 'Dutch',
 'French',
 'German',
 'Indian',
 'Italian',
 'Japanese',
 'Mexican',
 'Polish',
 'South African',
 'Swiss',
 'missing'}

In [152]:
#make sub df based on the set of common nationalities to compare their acquisition rate over time 

nat1 = workingDF[workingDF.Nationality=='American'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat1_new = nat1.groupby(nat1['DateAcquired']).aggregate(aggregation_functions)

nat2 = workingDF[workingDF.Nationality=='Argentine'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat2_new = nat2.groupby(nat2['DateAcquired']).aggregate(aggregation_functions)

nat3 = workingDF[workingDF.Nationality=='Austrian'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat3_new = nat3.groupby(nat3['DateAcquired']).aggregate(aggregation_functions)

nat4 = workingDF[workingDF.Nationality=='British'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat4_new = nat4.groupby(nat4['DateAcquired']).aggregate(aggregation_functions)

nat5 = workingDF[workingDF.Nationality=='Canadian'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat5_new = nat5.groupby(nat5['DateAcquired']).aggregate(aggregation_functions)

nat6 = workingDF[workingDF.Nationality=='Chilean'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat6_new = nat6.groupby(nat6['DateAcquired']).aggregate(aggregation_functions)

nat7 = workingDF[workingDF.Nationality=='Chinese'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat7_new = nat7.groupby(nat7['DateAcquired']).aggregate(aggregation_functions)

nat8 = workingDF[workingDF.Nationality=='Colombian'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat8_new = nat8.groupby(nat8['DateAcquired']).aggregate(aggregation_functions)

nat9 = workingDF[workingDF.Nationality=='Dutch'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat9_new = nat9.groupby(nat9['DateAcquired']).aggregate(aggregation_functions)

nat10 = workingDF[workingDF.Nationality=='French'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat10_new = nat10.groupby(nat10['DateAcquired']).aggregate(aggregation_functions)

nat11 = workingDF[workingDF.Nationality=='German'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat11_new = nat11.groupby(nat11['DateAcquired']).aggregate(aggregation_functions)

nat12 = workingDF[workingDF.Nationality=='Indian'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat12_new = nat12.groupby(nat12['DateAcquired']).aggregate(aggregation_functions)

nat13 = workingDF[workingDF.Nationality=='Italian'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat13_new = nat13.groupby(nat13['DateAcquired']).aggregate(aggregation_functions)

nat14 = workingDF[workingDF.Nationality=='Japanese'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat14_new = nat14.groupby(nat14['DateAcquired']).aggregate(aggregation_functions)

nat15 = workingDF[workingDF.Nationality=='Mexican'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat15_new = nat15.groupby(nat15['DateAcquired']).aggregate(aggregation_functions)

nat16 = workingDF[workingDF.Nationality=='Polish'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat16_new = nat16.groupby(nat16['DateAcquired']).aggregate(aggregation_functions)

nat17 = workingDF[workingDF.Nationality=='South African'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat17_new = nat17.groupby(nat17['DateAcquired']).aggregate(aggregation_functions)

nat18 = workingDF[workingDF.Nationality=='Swiss'].drop('Nationality', axis = 1)
aggregation_functions = {'DateAcquired':'first','CountM': 'sum', 'CountR': 'sum'}
nat18_new = nat18.groupby(nat18['DateAcquired']).aggregate(aggregation_functions)


### Plots: Pie charts 
##### Variables: Acquisition date, Nationality, Artworks for nationality (biggest dfs are sampled down)

### Comparison over total nationalities acquired -- Moma's new and old collections 

In [140]:
# create plot comparison count nationalities per Momas' period 
labels =  working_df.Nationality
# Create subplots: use 'domain' type for Pie subplot
fig22 = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig22.add_trace(go.Pie(labels=labels, values= working_df['Contemporary']),
              1, 1)
fig22.add_trace(go.Pie(labels=labels, values= working_df['Modern']),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig22.update_traces(hoverinfo="label+percent", textposition='inside')

fig22.update_layout(
    title_text="MoMA Collection by Nationality and Period",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Old', x=-0.1, y=1, font_size=20, showarrow=False),
                 dict(text='New', x=0.5, y=1, font_size=20, showarrow=False)], width=1200)
fig22.show()

### Plots: Line charts 
##### Variables: Acquisition date, Nationality, Artworks for nationality (biggest dfs are sampled down)

### Comparison acquisition of artworks by nationality -- Moma after 1980 vs. Rhizome Collection by decades

In [143]:
nat4_new

Unnamed: 0_level_0,DateAcquired,CountM,CountR
DateAcquired,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [156]:
#create subplots for nationalities and their figures

fig23 = px.line(nat1_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Americans', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig23.update_traces(hovertemplate=None,  hoverinfo='y')
fig23.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig23.show()

fig24 = px.line(nat2_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Argentine', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig24.update_traces(hovertemplate=None,  hoverinfo='y')
fig24.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig24.show()

fig25 = px.line(nat3_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Austrian artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig25.update_traces(hovertemplate=None,  hoverinfo='y')
fig25.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig25.show()

fig26= px.line(nat4_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='British artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig26.update_traces(hovertemplate=None,  hoverinfo='y')
fig26.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig26.show()

fig27= px.line(nat5_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Canadian artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig27.update_traces(hovertemplate=None,  hoverinfo='y')
fig27.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig27.show()

fig28 = px.line(nat6_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Chilean artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig28.update_traces(hovertemplate=None,  hoverinfo='y')
fig28.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig28.show()

fig29 = px.line(nat7_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Chinese artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig29.update_traces(hovertemplate=None,  hoverinfo='y')
fig29.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig29.show()



fig30 = px.line(nat8_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Colombian artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig30.update_traces(hovertemplate=None,  hoverinfo='y')
fig30.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig30.show()

fig31 = px.line(nat9_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Dutch artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig31.update_traces(hovertemplate=None,  hoverinfo='y')
fig31.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig31.show()

fig32 = px.line(nat10_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='French artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig32.update_traces(hovertemplate=None,  hoverinfo='y')
fig32.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig32.show()

fig33 = px.line(nat11_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='German artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig33.update_traces(hovertemplate=None,  hoverinfo='y')
fig33.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig33.show()

fig34 = px.line(nat12_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Indian artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig34.update_traces(hovertemplate=None,  hoverinfo='y')
fig34.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig34.show()

fig35 = px.line(nat13_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Italian artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig35.update_traces(hovertemplate=None,  hoverinfo='y')
fig35.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig35.show()

fig36 = px.line(nat14_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Japanese artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig36.update_traces(hovertemplate=None,  hoverinfo='y')
fig36.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig36.show()

fig37 = px.line(nat15_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Mexican artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig37.update_traces(hovertemplate=None,  hoverinfo='y')
fig37.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig37.show()

fig38 = px.line(nat16_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Polish artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig38.update_traces(hovertemplate=None,  hoverinfo='y')
fig38.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig38.show()

fig39 = px.line(nat17_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='South African artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig39.update_traces(hovertemplate=None,  hoverinfo='y')
fig39.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig39.show()

fig40 = px.line(nat18_new, 
            x='DateAcquired', 
            y=['CountM', 'CountR'], 
            markers=True, 
            labels={'DateAcquired': 'Date of Acquisition', 'value': 'Total Acquisitions', 'variable': 'Source'}, 
            title='Swiss artists', 
            color_discrete_map={"Count Moma": "#456987","Count Rhizome": "#147852"})
fig40.update_traces(hovertemplate=None,  hoverinfo='y')
fig40.update_layout(xaxis = dict(tickmode = 'array', tickvals = [2000, 2005, 2010, 2015, 2020], ticktext = ['2000', '2005', '2010', '2015', '2020']), hovermode="x unified")
fig40.show()

## 5b How does nationality representation relate to gender? Rhizome vs MoMA 1983+ / MoMA pre-83 v MoMA post-83

--> gender proportions for each nationality across time OR total OR medium 

Viz: bubble charts (Chiara) w/ male / female / collectives across time + total  

Story: maybe similar to 4? 

->  maybe a parallel set? 


# Plotly Visualizations for gender and nationality comparison

## 1 Acquisition overview of Moma and Rhizome

Count of how many artworks for each country have been added to the MoMA (new and old) and Rhizome collections

### MoMA's created before 1980

In [None]:

before80.Nationality = before80.Nationality.astype('str')
nats = ', '.join(before80.Nationality)
nats = list(set(nats.split(', ')))
df_nats_before = pd.DataFrame()

for country in nats:
    nat_sub =  before80[before80.Nationality == country]    
    nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
    years = ', '.join(nat_sub.DateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.DateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        new_row = pd.DataFrame([[country, year, entries_year, f_count]])
        df_nats_before = pd.concat([df_nats_before, new_row], axis=0, ignore_index=True)
df_nats_before.columns= ["Nation", "DateAcquired", "Count", "Females"]
df_nats_before.DateAcquired = df_nats_before.DateAcquired.astype('int')
df_nats_before    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')


Unnamed: 0,Nation,DateAcquired,Count,Females
0,Argentine,1942,27,7
1,Argentine,1943,1,0
2,Argentine,1954,29,0
3,Argentine,1956,2,0
4,Argentine,1957,2,1
...,...,...,...,...
2021,Croatian,2016,1,0
2022,Croatian,2018,2,0
2023,Croatian,2019,2,0
2024,Sudanese,1965,1,0


In [None]:
import plotly.express as px
fig1 = px.area(df_nats_before, x="DateAcquired", y="Count", color="Nation", line_group="Nation")
fig1.update_layout(
    xaxis = dict(
                
        tickmode = 'linear',

        dtick = 10
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)



   
fig1.show()

In [None]:
df_nats_before.sort_values( by=["Count"], ascending=True)

### MoMA's created after 1980

In [None]:
after80

Unnamed: 0,Title,Artist,ID,DateCreated,Medium,Department,DateAcquired,URL,ThumbnailURL,Nationality,Gender
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,1987,Paint and colored pencil on print,Architecture & Design,1995,http://www.moma.org/collection/works/3,http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...,French,M
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Photographic reproduction with colored synthet...,Architecture & Design,1995,http://www.moma.org/collection/works/5,http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...,missing,M
31,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Photographic reproduction with colored synthet...,Architecture & Design,1995,http://www.moma.org/collection/works/33,http://www.moma.org/media/W1siZiIsIjIwMCJdLFsi...,missing,M
35,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Photographic reproduction with colored synthet...,Architecture & Design,1995,http://www.moma.org/collection/works/38,http://www.moma.org/media/W1siZiIsIjI2NyJdLFsi...,missing,M
40,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1980,Ink on tracing paper,Architecture & Design,1995,http://www.moma.org/collection/works/44,http://www.moma.org/media/W1siZiIsIjI5NiJdLFsi...,missing,M
...,...,...,...,...,...,...,...,...,...,...,...
138114,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing
138115,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing
138116,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing
138117,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing


In [None]:
after80.Nationality = after80.Nationality.astype('str')
nats = ', '.join(after80.Nationality)
nats = list(set(nats.split(', ')))
df_nats_after = pd.DataFrame()

for country in nats:
    nat_sub =  after80[after80.Nationality == country]    
    nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
    years = ', '.join(nat_sub.DateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.DateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        new_row = pd.DataFrame([[country, year, entries_year, f_count]])
        df_nats_after = pd.concat([df_nats_after, new_row], axis=0, ignore_index=True)

df_nats_after.columns= ["Nation", "DateAcquired", "Count", "Females"]

df_nats_after.DateAcquired = df_nats_after.DateAcquired.astype('int')

df_nats_after   



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Nation,DateAcquired,Count,Females
0,Argentine,1986,3,0
1,Argentine,1987,1,0
2,Argentine,1992,1,1
3,Argentine,1993,1,0
4,Argentine,1994,1,0
...,...,...,...,...
1003,Croatian,2014,1,0
1004,Croatian,2015,1,0
1005,Emirati,2019,5,5
1006,Bahamian,2000,1,1


In [None]:
import plotly.express as px
fig3 = px.area(df_nats_after, x="DateAcquired", y="Count", color="Nation", line_group="Nation")
fig3.update_layout(
    xaxis = dict(

        tickmode = 'linear',
        tick0 = 1980,
        dtick = 2
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
)

   
fig3.show()


### Rhizome's 

In [None]:
rhizome

Unnamed: 0,ID,URL,Title,Artist,dateAcquired,dateCreated,Nationality,Gender
0,879,https://artbase.rhizome.org/wiki/Q2423,ZUR FARBENLEHRE (THEORY OF COLOURS),Steven Jones,2007,2007,British,M
1,1020,https://artbase.rhizome.org/wiki/Q4089,Zones de Convergence,cicero,2005,2005,missing,missing
2,"243, 701",https://artbase.rhizome.org/wiki/Q1475,Zombie and Mummy,"Dragan Espenschied, Olia Lialina",2004,2002,"German, Russian","M, F"
3,312,https://artbase.rhizome.org/wiki/Q4374,"Zaira, City of Memories",Gokcen Erguven,2004,2004,Turkish,F
4,920,https://artbase.rhizome.org/wiki/Q3972,Z_G [zeitgeist gestalten],Tiago Borges,2008,2007,Angolan,M
...,...,...,...,...,...,...,...,...
2265,1075,https://artbase.rhizome.org/wiki/Q4358,1999,joan escofet,2001,2000,missing,missing
2266,771,https://artbase.rhizome.org/wiki/Q3761,1969,Rhea Myers,2004,2004,British,F
2267,859,https://artbase.rhizome.org/wiki/Q2283,1953,Skye Thorstenson,2003,2002,missing,M
2268,481,https://artbase.rhizome.org/wiki/Q2511,160,Katie Lips,2005,2005,British,F


In [None]:

rhizome.Nationality = rhizome.Nationality.astype('str')
nats = ', '.join(rhizome.Nationality)
nats = list(set(nats.split(', ')))
df_nats_rhizome = pd.DataFrame()

for country in nats:
    nat_sub =  rhizome[rhizome.Nationality == country]    
    nat_sub.DateAcquired = nat_sub.dateAcquired.astype('str')
    years = ', '.join(nat_sub.DateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.DateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        new_row = pd.DataFrame([[country, year, entries_year, f_count]])
        df_nats_rhizome = pd.concat([df_nats_rhizome, new_row], axis=0, ignore_index=True)

df_nats_rhizome.columns= ["Nation", "DateAcquired", "Count", "Females"]

df_nats_rhizome.DateAcquired = df_nats_rhizome.DateAcquired.astype('int')
df_nats_rhizome    


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



Unnamed: 0,Nation,DateAcquired,Count,Females
0,Italian,2001,1,0
1,Argentine,2001,2,0
2,Argentine,2002,2,1
3,Argentine,2003,2,0
4,Argentine,2004,6,4
...,...,...,...,...
281,Israeli,2006,1,0
282,Israeli,2007,1,1
283,Israeli,2008,3,0
284,Croatian,2003,1,1


In [None]:
fig4 = px.area(df_nats_rhizome, x="DateAcquired", y="Count", color="Nation", line_group="Nation")
fig4.update_layout(
     xaxis = dict(
        tick0 = 2001,
        dtick = 2
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
   
)
fig4.show()

## Acquisition of artworks by female over total -- of Moma and Rhizome

Count of how many artworks for each country have been added to the MoMA (new and old) and Rhizome collections, compared with the number of artworks acquired made by only a female artist. 

Visualization is on categorical data (gender, nationality), ordinal data (acquisition year) and numerical data (tot artworks per nationality  count, artworks by a female artist per nationality)

In [None]:
import plotly.io as pio
import plotly.express as px
df =df_nats_before[(df_nats_before['DateAcquired'] >= 1970) & (df_nats_before['DateAcquired'] <= 1980)]
df2 =df_nats_before.loc[df_nats_before['Count'] > 100]

fig5 = px.scatter(df2,
                 x="Count", y="DateAcquired", size="Females", color="Nation",
                 log_x=True,
                 title="MoMA's created before 1980")
# fig5.update_traces(
#     tickformatstops = {
#         'dtickrange': '[100,10000]'
#     }
# )
fig5.update_layout(
    paper_bgcolor='rgb(255, 255, 255)',
    plot_bgcolor='rgb(243, 243, 243)',
    )


fig5.show()

Add a shape for males 

In [None]:
fig6 = px.scatter(df_nats_after,
                 x="Count", y="DateAcquired", size="Females", color="Nation",
                 log_x=True, size_max=30,
                 title="MoMA's created after 1980")
fig6.show()

In [None]:
fig7 = px.scatter(df_nats_rhizome,
                 x="DateAcquired", y="Count", size="Females", color="Nation",
                 log_x=True, size_max=30,
                 title="Rhizome's")
fig7.show()

In [None]:
before80['Source'] = 'Old'
after80['Source'] = 'New'
MoMA_complete = pd.concat([before80, after80], axis=0, ignore_index=True)
MoMA_complete

Unnamed: 0,Title,Artist,ID,DateCreated,Medium,Department,DateAcquired,URL,ThumbnailURL,Nationality,Gender,Source
0,"Ferdinandsbrücke Project, Vienna, Austria (Ele...",Otto Wagner,6210,1896,Ink and cut-and-pasted painted pages on paper,Architecture & Design,1996,http://www.moma.org/collection/works/2,http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...,Austrian,M,Old
1,"Villa near Vienna Project, Outside Vienna, Aus...",Emil Hoppe,7605,1903,"Graphite, pen, color pencil, ink, and gouache ...",Architecture & Design,1997,http://www.moma.org/collection/works/4,http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...,Austrian,M,Old
2,"Villa, project, outside Vienna, Austria, Exter...",Emil Hoppe,7605,1903,"Graphite, color pencil, ink, and gouache on tr...",Architecture & Design,1997,http://www.moma.org/collection/works/6,http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...,Austrian,M,Old
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1976,Gelatin silver photograph,Architecture & Design,1995,http://www.moma.org/collection/works/7,http://www.moma.org/media/W1siZiIsIjE0OCJdLFsi...,missing,M,Old
4,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,1976,Gelatin silver photographs,Architecture & Design,1995,http://www.moma.org/collection/works/8,http://www.moma.org/media/W1siZiIsIjE0OSJdLFsi...,missing,M,Old
...,...,...,...,...,...,...,...,...,...,...,...,...
138146,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing,New
138147,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing,New
138148,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing,New
138149,Cóctel (Cocktail),Alejandro Kuropatwa,132939,1996,Chromogenic color print,Photography,2020,missing,missing,Argentine,missing,New


In [None]:
MoMA_complete.Department = MoMA_complete.Department.astype('str')
departments = ', '.join(MoMA_complete.Department)
departments = list(set(departments.split(', ')))
df_moma_complete = pd.DataFrame()

for dep in departments:
    nat_sub =  MoMA_complete[MoMA_complete.Department == dep]    
    nat_sub.DateAcquired = nat_sub.DateAcquired.astype('str')
    years = ', '.join(nat_sub.DateAcquired)
    years = sorted(list([item[:4] for item in list(set(years.split(', ')))]))[0:-1]
    for year in years:
        year_sub = nat_sub[nat_sub.DateAcquired == year]
        entries_year = len(year_sub)
        f_count = len(year_sub[year_sub.Gender == 'F'])
        new_row = pd.DataFrame([[dep, year, entries_year, f_count]])
        df_moma_complete = pd.concat([df_moma_complete, new_row], axis=0, ignore_index=True)

df_moma_complete.columns= ["Department", "DateAcquired", "Count", "Females"]
df_moma_complete 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Department,DateAcquired,Count,Females
0,Drawings & Prints,1929,9,0
1,Drawings & Prints,1930,3,0
2,Drawings & Prints,1931,1,0
3,Drawings & Prints,1932,14,0
4,Drawings & Prints,1933,2,0
...,...,...,...,...
429,Architecture & Design,2015,1390,92
430,Architecture & Design,2016,503,127
431,Architecture & Design,2017,101,14
432,Architecture & Design,2018,857,339


In [None]:
   
df_moma_complete.DateAcquired = df_moma_complete.DateAcquired.astype('int')

In [None]:
fig8 = px.area(df_moma_complete, x="DateAcquired", y="Count", color="Department", line_group="Department")
fig8.update_layout(
     xaxis = dict(
        dtick = 10
    )
   
)

fig8.show()

In [None]:
fig9 = px.scatter(df_moma_complete,
                 x="Count", y="DateAcquired", size="Females", color="Department",
                 log_x=True, size_max=30,
                 title="MoMA's departmwnts and female works acquisition")
fig9.show()

Donughts made by laurent for comparisons 

In [None]:
#copy and add before/after value
old = before80.copy()
old['Period'] = 'Contemporary'
new = after80.copy()
new['Period'] = 'Modern'

#merge 
frames = [old, new]
moma_artworks_two_periods = pd.concat(frames)


In [None]:
#count occurrences of department values before and after 1980
before_n = old['Nationality'].value_counts().rename_axis('Nationality').reset_index(name='Contemporary')
after_n = new['Nationality'].value_counts().rename_axis('Nationality').reset_index(name='Modern')
moma_1980_x_dep = pd.merge(before_n, after_n, left_on='Nationality', right_on='Nationality')
moma_1980_x_dep

Unnamed: 0,Nationality,Contemporary,Modern
0,American,39754,17209
1,French,21683,906
2,German,6666,2573
3,missing,5740,568
4,British,4035,1541
...,...,...,...
191,"German, British",1,4
192,"Dutch, Dutch, Dutch",1,6
193,"French, Italian",1,5
194,"Swiss, German",1,1


In [None]:
better = moma_1980_x_dep[moma_1980_x_dep.Contemporary > 100]

In [None]:
better

Unnamed: 0,Nationality,Contemporary,Modern
0,American,39754,17209
1,French,21683,906
2,German,6666,2573
3,missing,5740,568
4,British,4035,1541
5,Spanish,2566,558
6,Italian,2130,644
7,Russian,2081,109
8,Japanese,1695,728
9,Swiss,1367,738


In [None]:
labels = moma_1980_x_dep.Nationality
# Create subplots: use 'domain' type for Pie subplot
fig10 = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig10.add_trace(go.Pie(labels=labels, values=moma_1980_x_dep['Contemporary']),
              1, 1)
fig10.add_trace(go.Pie(labels=labels, values=moma_1980_x_dep['Modern']),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig10.update_traces(hole=.4, hoverinfo="label+percent", textposition='inside')

fig10.update_layout(
    title_text="MoMA Collection by Nationality and Period",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Old', x=0.195, y=0.5, font_size=20, showarrow=False),
                 dict(text='New', x=0.820, y=0.5, font_size=20, showarrow=False)], width=1200)
fig10.show()

In [None]:
#copy and add before/after value
rhizNats = rhizome.copy()
rhizNats['Source'] = 'R'
Mnew_nats = after80.copy()
Mnew_nats['Source'] = 'M'

#merge 
frames = [Mnew_nats, rhizNats]
twoSrcs = pd.concat(frames)

In [None]:
rhz_n = rhizNats['Nationality'].value_counts().rename_axis('Nationality').reset_index(name='R')
Mnew_n = Mnew_nats['Nationality'].value_counts().rename_axis('Nationality').reset_index(name='M')
MR_nats_comp = pd.merge(rhz_n, Mnew_n, left_on='Nationality', right_on='Nationality')
MR_nats_comp

Unnamed: 0,Nationality,R,M
0,missing,609,568
1,American,481,17209
2,French,133,906
3,German,88,2573
4,Canadian,86,527
...,...,...,...
62,"Russian, Russian",1,24
63,"British, British, American",1,1
64,Bulgarian,1,8
65,Egyptian,1,58


# Maps

### Moma complete DF

In [None]:
# create MOMA df with country neames instead of nationality
natios_MOMA = set(df_moma_complete.Nationality)

missing = pd.DataFrame({'Aalborgenser': ['Korean', 'Native American', 'Canadian Inuit'], 'Aalborg': ['Korea', 'United States', 'Canada']})
missing
df_natParse = pd.read_csv('https://raw.githubusercontent.com/knowitall/chunkedextractor/master/src/main/resources/edu/knowitall/chunkedextractor/demonyms.csv')
correct_country = pd.concat([missing, df_natParse])
Country_df=df_moma_complete.copy()

Country_df = Country_df[Country_df.DateAcquired <2000]
for item in natios_MOMA:
    my = correct_country[correct_country['Aalborgenser'] == item]
    
    country = my[:1]['Aalborg'].values
    if len(country)>0:
        country_str = my[:1]['Aalborg'].values[0]

        Country_df.loc[Country_df["Nationality"] == item, "Nationality"] = country_str

countries_count = pd.DataFrame(columns= ['Nation', 'Count'])

new_set = set(Country_df.Nationality)
for item in new_set:
    subCountry = Country_df[Country_df['Nationality'] == item]
    sum_acquisitions = subCountry['Count'].sum()
    countries_count.loc[len(countries_count.index)] = [item, sum_acquisitions]

def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        return result[0].alpha_3
    except:
        return np.nan

countries_count["country_code"] = countries_count["Nation"].apply(lambda country: do_fuzzy_search(country))

In [None]:
# create moma map plot

fig = go.Figure(data=go.Choropleth(
    locations = countries_count['country_code'],
    z = countries_count['Count'],
    text = countries_count['Nation'],
    colorscale=[
            [0,"#8e5a79"],
            [0.3 ,"#925f7d"],
            [0.325 ,"#966582"],
            [0.350 ,"#9a6a87"],
            [0.375,"#9e708b"],
            [0.4 ,"#a27590"],
            [0.525 ,"#b690a6"],
            [0.550 ,"#ba96ab"],
            [0.575,"#be9baf"],
            [0.6 ,"#c2a0b4"],
            [0.625 ,"#c6a6b8"],
            [0.650 ,"#caabbd"],
            [0.675,"#ceb1c2"],
            [0.7 ,"#d2b6c6"],
            [0.725 ,"#d6bccb"],
            [0.995 ,"#d9c1cf"],
            [1, "#FFFFFF"]],
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total artworks',
))

fig.update_layout(
    title_text='Moma artworks acquired after the 2000, spread over Nations',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='orthographic',
        showocean=True, oceancolor="LightBlue"
    ),
    height = 700,
)

fig.show()

### Rhizome DF

In [None]:
# create Rhizome df with country neames instead of nationality
natios_RH = set(df_rhz_nats.Nationality)
Country_df=df_rhz_nats.copy()
for item in natios_RH:
    my = correct_country[correct_country['Aalborgenser'] == item]
    
    country = my[:1]['Aalborg'].values
    if len(country)>0:
        country_str = my[:1]['Aalborg'].values[0]

        Country_df.loc[Country_df["Nationality"] == item, "Nationality"] = country_str

RH_countries_count = pd.DataFrame(columns= ['Nation', 'Count'])

new_set = set(Country_df.Nationality)
for item in new_set:
    subCountry = Country_df[Country_df['Nationality'] == item]
    sum_acquisitions = subCountry['Count'].sum()
    RH_countries_count.loc[len(RH_countries_count.index)] = [item, sum_acquisitions]

def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        return result[0].alpha_3
    except:
        return np.nan

RH_countries_count["country_code"] = RH_countries_count["Nation"].apply(lambda country: do_fuzzy_search(country))

In [None]:
# create Rhizome map plot

fig = go.Figure(data=go.Choropleth(
    locations = RH_countries_count['country_code'],
    z = RH_countries_count['Count'],
    text = RH_countries_count['Nation'],
    colorscale=[
            [0,"#8e5a79"],
            [0.3 ,"#925f7d"],
            [0.325 ,"#966582"],
            [0.350 ,"#9a6a87"],
            [0.375,"#9e708b"],
            [0.4 ,"#a27590"],
            [0.525 ,"#b690a6"],
            [0.550 ,"#ba96ab"],
            [0.575,"#be9baf"],
            [0.6 ,"#c2a0b4"],
            [0.625 ,"#c6a6b8"],
            [0.650 ,"#caabbd"],
            [0.675,"#ceb1c2"],
            [0.7 ,"#d2b6c6"],
            [0.725 ,"#d6bccb"],
            [0.995 ,"#d9c1cf"],
            [1, "#FFFFFF"]],
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total artworks',
))

fig.update_layout(
    title_text='Rhizome total artworks number spread over nations',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='orthographic',
        showocean=True, oceancolor="LightBlue"
    ),
    height = 700,
)

fig.show()