### Pakege와 데이터 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv("data/abalone.csv", names = ['Sex','Length','Diameter','Height','Whole weight','Shucked weight','Viscera weight','Shell weight','Rings'])
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


# Sklearn 활용

### Data를 train과 test로 분할

In [3]:
data = np.array(df)
nvar = data.shape[1]
datax = data[:,1:nvar]
datay = data[:,0]

In [4]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(datax, datay, test_size=0.3, random_state=500)

### 변수 정규화 실행

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train_x)
train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)

### SKlearn을 이용해서 Classifier A 생성 후 예측

In [6]:
from sklearn.neural_network import MLPClassifier

clf_a = MLPClassifier(hidden_layer_sizes=(1000,1500,2000,1000))
clf_a.fit(train_x, train_y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(1000, 1500, 2000, 1000),
              learning_rate='constant', learning_rate_init=0.001, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [7]:
predict_a = clf_a.predict(test_x)

### sklearn을 활용한 Classifier의 accuracy

In [8]:
from sklearn.metrics import accuracy_score
accuracy_score(test_y, predict_a)

0.5486443381180224

# Keras 활용

In [9]:
import tensorflow as tf
import keras
from keras import layers, models, optimizers
from keras.utils import to_categorical

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


### 딥러닝을 위한 데이터 정제 One Hot Encoder 활용

In [10]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
X = ohe.fit_transform(df.Sex.values.reshape(-1,1)).toarray()
dfOneHot = pd.DataFrame(X, columns = ["Sex_F","Sex_I","Sex_M"])
df = pd.concat([dfOneHot,df],axis = 1)
del df['Sex']
df.head()

Unnamed: 0,Sex_F,Sex_I,Sex_M,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,0.0,0.0,1.0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,0.0,0.0,1.0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,1.0,0.0,0.0,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,0.0,0.0,1.0,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,0.0,1.0,0.0,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


### Data를 train과 test로 분할

In [11]:
data = np.array(df)
nvar = data.shape[1]
datax = data[:,3:nvar]
datay = data[:,0:3]

In [12]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(datax, datay, test_size=0.3, random_state=500)

### 변수 정규화 실행

In [13]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(train_x)
train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)

### Keras를 이용하여 Classifier B 생성

In [14]:
input_shape = (8,)

mlp_model = models.Sequential()
mlp_model.add(layers.Dense(units = 1000, activation = 'relu', input_shape=input_shape))
mlp_model.add(layers.Dense(units = 1500, activation = 'relu'))
mlp_model.add(layers.Dense(units = 2000, activation = 'relu'))
mlp_model.add(layers.Dense(units = 1000, activation = 'relu'))
mlp_model.add(layers.Dense(units = 3, activation = 'softmax'))

mlp_model.compile(optimizer='Adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


In [15]:
mlp_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1000)              9000      
_________________________________________________________________
dense_2 (Dense)              (None, 1500)              1501500   
_________________________________________________________________
dense_3 (Dense)              (None, 2000)              3002000   
_________________________________________________________________
dense_4 (Dense)              (None, 1000)              2001000   
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 3003      
Total params: 6,516,503
Trainable params: 6,516,503
Non-trainable params: 0
_________________________________________________________________


In [16]:
history = mlp_model.fit(train_x, train_y, validation_data = [test_x, test_y], batch_size=512, epochs=50)

Instructions for updating:
Use tf.cast instead.
Train on 2923 samples, validate on 1254 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
