# Practice Neural Networks

In [1]:
!pip install plotly   



In [2]:
!pip install ipywidgets



In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly as pl
%matplotlib inline

In [2]:
art_works = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Artworks.csv')

In [3]:
art_works.columns

Index(['Title', 'Artist', 'ConstituentID', 'ArtistBio', 'Nationality',
       'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions',
       'CreditLine', 'AccessionNumber', 'Classification', 'Department',
       'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL',
       'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)',
       'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)',
       'Duration (sec.)'],
      dtype='object')

In [4]:
# Select Columns.
art_works = art_works[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

# Convert URL's to booleans.
art_works['URL'] = art_works['URL'].notnull()
art_works['ThumbnailURL'] = art_works['ThumbnailURL'].notnull()


In [5]:
# Drop films and some other tricky rows.
art_works = art_works[art_works['Department']!='Film']
art_works = art_works[art_works['Department']!='Media and Performance Art']
art_works = art_works[art_works['Department']!='Fluxus Collection']

# Drop missing data.
art_works = art_works.dropna()

In [6]:
art_works.head()

Unnamed: 0,Artist,Nationality,Gender,Date,Department,DateAcquired,URL,ThumbnailURL,Height (cm),Width (cm)
0,Otto Wagner,(Austrian),(Male),1896,Architecture & Design,1996-04-09,True,True,48.6,168.9
1,Christian de Portzamparc,(French),(Male),1987,Architecture & Design,1995-01-17,True,True,40.6401,29.8451
2,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,34.3,31.8
3,Bernard Tschumi,(),(Male),1980,Architecture & Design,1995-01-17,True,True,50.8,50.8
4,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,38.4,19.1


## Build the Model

In [7]:
# Get data types.
art_works.dtypes

Artist           object
Nationality      object
Gender           object
Date             object
Department       object
DateAcquired     object
URL                bool
ThumbnailURL       bool
Height (cm)     float64
Width (cm)      float64
dtype: object

### NOTE: The DateAcquired column is an object. Let's transform that to a datetime object and add a feature for just the year the artwork was acquired.

In [8]:
art_works['DateAcquired'] = pd.to_datetime(art_works.DateAcquired)
art_works['YearAcquired'] = art_works.DateAcquired.dt.year
art_works['YearAcquired'].dtype

dtype('int64')

### Data Cleaning: Part 2

In [9]:
# Remove multiple nationalities, genders, and artists.
art_works.loc[art_works['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
art_works.loc[art_works['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
art_works.loc[art_works['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'


In [10]:
# Convert dates to start date, cutting down number of distinct examples.
art_works['Date'] = pd.Series(art_works.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

In [11]:
# Final column drops and NA drop. 
## Can't use "NA" values in Neural Networks and Percetron models
X = art_works.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date'], 1)

In [12]:
# Create dummies separately.
artists = pd.get_dummies(art_works.Artist)
nationalities = pd.get_dummies(art_works.Nationality)
dates = pd.get_dummies(art_works.Date)

In [13]:
# Concat with other variables, but artists slows this wayyyyy down so we'll keep it out for now
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = art_works.Department

## Build the Model

In [14]:
# Alright! We've done our prep, let's build the model.
# Neural networks are hugely computationally intensive.
# This may take several minutes to run.

# Import the model.
from sklearn.neural_network import MLPClassifier

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp = MLPClassifier(hidden_layer_sizes=(1000,))
mlp.fit(X, Y)

  "pandas.DataFrame with sparse columns found."


MLPClassifier(hidden_layer_sizes=(1000,))

In [15]:
# Evaluate by Score
mlp.score(X, Y)

  "pandas.DataFrame with sparse columns found."


0.8102508479561729

In [16]:
Y.value_counts()/len(Y)

Drawings & Prints        0.622800
Photography              0.225837
Architecture & Design    0.113383
Painting & Sculpture     0.033578
Media and Performance    0.004403
Name: Department, dtype: float64

In [18]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp, X, Y, cv=5)

  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."
  "pandas.DataFrame with sparse columns found."


array([0.64929454, 0.6228054 , 0.6228054 , 0.6228054 , 0.6228054 ])

In [19]:
# Your code here. Experiment with hidden layers to build your own model.
    # We will try 3 layers
# Adjust parameter and fit the model, with a single, 100 perceptron layer, 4 perception and 10 perceptron. 
    ## Using a tuple (100,4,10)
mlp_2 = MLPClassifier(hidden_layer_sizes=(100,4,10))
mlp_2.fit(X, Y)

  "pandas.DataFrame with sparse columns found."


MLPClassifier(hidden_layer_sizes=(100, 4, 10))

In [20]:
# Evaluate second iteration by Score
mlp_2.score(X, Y)

  "pandas.DataFrame with sparse columns found."


0.781213519500694