Now take your Keras skills and go build another neural network. Pick your data set, but it should be one of abstract types, possibly even nonnumeric, and use Keras to make five implementations of your network. Compare them both in computational complexity as well as in accuracy and given that tradeoff decide which one you like best.

Your dataset should be sufficiently large for a neural network to perform well (samples should really be in the thousands here) and try to pick something that takes advantage of neural networks’ ability to have both feature extraction and supervised capabilities, so don’t pick something with an easy to consume list of features already generated for you (though neural networks can still be useful in those contexts).

Note that if you want to use an unprocessed image dataset, scikit-image is a useful package for converting to importable numerics.

In [1]:
import tensorflow as tf
import keras
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Import various componenets for model building
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers import LSTM, Input, TimeDistributed
from keras.models import Model
from keras.optimizers import RMSprop

# Import the backend
from keras import backend as K

Using TensorFlow backend.


In [2]:
#import first part of data
wine1 = pd.read_csv('winemag-data_first.csv')
#import second part of data
wine2 = pd.read_csv('winemag-data-second.csv')
#combine wine data
wine_merge = wine1.append(wine2, ignore_index=True)
#print head
wine_merge.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,,,,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,,,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,,,,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,,,,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,,,,Provence red blend,Domaine de la Bégude


In [3]:
#get column names
wine_merge.columns

Index(['Unnamed: 0', 'country', 'description', 'designation', 'points',
       'price', 'province', 'region_1', 'region_2', 'taster_name',
       'taster_twitter_handle', 'title', 'variety', 'winery'],
      dtype='object')

In [None]:
#drop irrelevant colums
wine = wine_merge.drop(['Unnamed: 0','description','region_2','taster_name','taster_twitter_handle','title'],axis=1)
#drop null
wine = wine.dropna()
#print info
wine.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 147459 entries, 0 to 280900
Data columns (total 8 columns):
country        147459 non-null object
designation    147459 non-null object
points         147459 non-null int64
price          147459 non-null float64
province       147459 non-null object
region_1       147459 non-null object
variety        147459 non-null object
winery         147459 non-null object
dtypes: float64(1), int64(1), object(6)
memory usage: 10.1+ MB


In [None]:
# ONLY RUN ONCE
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
col = ['country','variety','winery','region_1','province','designation']
wine[col] = wine[col].apply(lambda x: le.fit_transform(x))
wine.head()

Unnamed: 0,country,designation,points,price,province,region_1,variety,winery
0,6,20879,96,235.0,8,715,47,5608
1,5,5276,96,110.0,39,1042,437,1016
2,6,30259,96,90.0,8,513,372,6988
3,6,26636,96,65.0,43,1190,322,8603
4,3,17075,95,66.0,47,60,335,4104


In [None]:
#from sklearn.preprocessing import OneHotEncoder
#col = ['variety','winery','region_1','province','designation']
#ohe = OneHotEncoder()
#wine[col] = wine[col].apply(lambda x: ohe.fit_transform(x.values.reshape(1,-1))).values
#ohe.fit(wine['country'].values.reshape(1,-1))
#ohe.transform(wine['country'].values.lenshape(1,-1)).toarray()[0]

In [None]:
wine['country'].value_counts()

6    76619
4    25395
3    21777
5    11706
0     6720
1     4923
2      319
Name: country, dtype: int64

In [None]:
# Specify the data 
#X=wine.drop(['country'],axis=1)

# Specify the target labels and flatten the array 
#y=to_categorical(pd.get_dummies(wine['country']))

# Split the data up in train and test sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1337)

In [None]:
# Specify the data 
X=wine.drop(['country'],axis=1)

# Specify the target labels and flatten the array 
#y=wine['country']
y = to_categorical(wine['country'])

# Split the data up in train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1337)

In [None]:
# Initialize the constructor
model = Sequential()

# Add an input layer 
model.add(Dense(100, activation='relu', input_dim= X_train.shape[1] ))

# Add a hidden layer 
model.add(Dense(100, activation='relu'))

# Add an output layer 
model.add(Dense(7, activation='softmax'))

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               800       
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 7)                 707       
Total params: 11,607
Trainable params: 11,607
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])
                   
model.fit(X_train, y_train,epochs=10, batch_size=512, verbose=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
