In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report

In [0]:
artworks = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')

In [0]:
artworks.head()

Unnamed: 0,Title,Artist,ConstituentID,ArtistBio,Nationality,BeginDate,EndDate,Gender,Date,Medium,...,ThumbnailURL,Circumference (cm),Depth (cm),Diameter (cm),Height (cm),Length (cm),Weight (kg),Width (cm),Seat Height (cm),Duration (sec.)
0,"Ferdinandsbrücke Project, Vienna, Austria, Ele...",Otto Wagner,6210,"(Austrian, 1841–1918)",(Austrian),(1841),(1918),(Male),1896,Ink and cut-and-pasted painted pages on paper,...,http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...,,,,48.6,,,168.9,,
1,"City of Music, National Superior Conservatory ...",Christian de Portzamparc,7470,"(French, born 1944)",(French),(1944),(0),(Male),1987,Paint and colored pencil on print,...,http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...,,,,40.6401,,,29.8451,,
2,"Villa near Vienna Project, Outside Vienna, Aus...",Emil Hoppe,7605,"(Austrian, 1876–1957)",(Austrian),(1876),(1957),(Male),1903,"Graphite, pen, color pencil, ink, and gouache ...",...,http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...,,,,34.3,,,31.8,,
3,"The Manhattan Transcripts Project, New York, N...",Bernard Tschumi,7056,"(French and Swiss, born Switzerland 1944)",(),(1944),(0),(Male),1980,Photographic reproduction with colored synthet...,...,http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...,,,,50.8,,,50.8,,
4,"Villa, project, outside Vienna, Austria, Exter...",Emil Hoppe,7605,"(Austrian, 1876–1957)",(Austrian),(1876),(1957),(Male),1903,"Graphite, color pencil, ink, and gouache on tr...",...,http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...,,,,38.4,,,19.1,,


In [0]:
artworks.columns

Index(['Title', 'Artist', 'ConstituentID', 'ArtistBio', 'Nationality',
       'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions',
       'CreditLine', 'AccessionNumber', 'Classification', 'Department',
       'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL',
       'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)',
       'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)',
       'Duration (sec.)'],
      dtype='object')

In [0]:
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date',
                    'Department', 'DateAcquired', 'URL', 'ThumbnailURL',
                    'Height (cm)', 'Width (cm)']]
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()
artworks = artworks[(artworks.Department != 'Film')&
                    (artworks.Department != 'Media and Performance Art')&
                    (artworks.Department != 'Fluxus Collection')]
artworks = artworks.dropna()

In [0]:
artworks.head()

Unnamed: 0,Artist,Nationality,Gender,Date,Department,DateAcquired,URL,ThumbnailURL,Height (cm),Width (cm)
0,Otto Wagner,(Austrian),(Male),1896,Architecture & Design,1996-04-09,True,True,48.6,168.9
1,Christian de Portzamparc,(French),(Male),1987,Architecture & Design,1995-01-17,True,True,40.6401,29.8451
2,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,34.3,31.8
3,Bernard Tschumi,(),(Male),1980,Architecture & Design,1995-01-17,True,True,50.8,50.8
4,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,38.4,19.1


In [0]:
artworks.Gender.tail(50)

135568          ()
135569          ()
135570          ()
135571          ()
135572          ()
135573          ()
135574          ()
135575          ()
135576          ()
135577      (Male)
135579          ()
135580          ()
135581          ()
135582          ()
135583    (Female)
135584    (Female)
135585    (Female)
135586    (Female)
135587    (Female)
135588    (Female)
135589    (Female)
135590    (Female)
135591    (Female)
135592    (Female)
135593    (Female)
135594    (Female)
135595      (Male)
135596      (Male)
135597      (Male)
135598    (Female)
135599    (Female)
135612    (Female)
135613    (Female)
135614    (Female)
135615    (Female)
135623      (Male)
135624      (Male)
135625      (Male)
135626      (Male)
135627      (Male)
135628      (Male)
135629      (Male)
135630      (Male)
135631    (Female)
135632    (Female)
135633    (Female)
135634    (Female)
135635    (Female)
135791      (Male)
135792    (Female)
Name: Gender, dtype: object

In [0]:
artworks.Gender.value_counts()

(Male)                                                                                                                                                                   81754
(Female)                                                                                                                                                                 14042
()                                                                                                                                                                        4854
(Male) (Male)                                                                                                                                                             1381
(Male) (Male) (Male)                                                                                                                                                       844
(Male) ()                                                                                                                    

In [0]:
artworks.dtypes

Artist           object
Nationality      object
Gender           object
Date             object
Department       object
DateAcquired     object
URL                bool
ThumbnailURL       bool
Height (cm)     float64
Width (cm)      float64
dtype: object

In [0]:
artworks['DateAcquired'] = pd.to_datetime(artworks.DateAcquired)
artworks['YearAcquired'] = artworks.DateAcquired.dt.year
artworks.YearAcquired.dtype

dtype('int64')

In [0]:
artworks.loc[artworks.Gender.str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks.Nationality.str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks.Artist.str.contains(','), 'Artist'] = 'Multiple_Artists'

artworks['Date'] = pd.Series(artworks.Date.str.extract(
        '([0-9]{4})', expand=False))[:-1]

X=artworks.drop(['Department', 'DateAcquired', 'Artist',
                'Nationality', 'Date'], 1)
artists = pd.get_dummies(artworks.Artist)
nationalities = pd.get_dummies(artworks.Nationality)
dates = pd.get_dummies(artworks.Date)

X = pd.get_dummies(X)
X = pd.concat([X, nationalities, dates], axis=1)
Y = artworks.Department

In [0]:
X['Department'] = artworks['Department']
Xsample = X.sample(frac=.3)

In [0]:
Xnew = Xsample.drop(columns='Department')
Ynew = Xsample['Department']
X_train, X_test, y_train, y_test = train_test_split(Xnew, Ynew, test_size=.3)

In [0]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(100,4))
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 4), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [0]:
y_pred = mlp.predict(X_test)

In [0]:
pd.crosstab(y_pred, y_test)

Department,Architecture & Design,Drawings,Painting & Sculpture,Photography,Prints & Illustrated Books
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Prints & Illustrated Books,1099,1022,314,2130,4978


In [0]:
classification_report(y_test, y_pred)

  'precision', 'predicted', average, warn_for)


'                            precision    recall  f1-score   support\n\n     Architecture & Design       0.00      0.00      0.00      1099\n                  Drawings       0.00      0.00      0.00      1022\n      Painting & Sculpture       0.00      0.00      0.00       314\n               Photography       0.00      0.00      0.00      2130\nPrints & Illustrated Books       0.52      1.00      0.69      4978\n\n               avg / total       0.27      0.52      0.36      9543\n'