# MNIST (FFNN 사용)

In [21]:
import warnings
warnings.filterwarnings(action='ignore')
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [22]:
# 128장의 이미지를 random하게 공급 (중복은?!)
batch_x, batch_y = mnist.train.next_batch(128)
print(batch_x.shape) # flatten된 상태 (784)
learning_rate = 0.1 # hyper parameter
num_steps = 500 # epoch 세대
batch_size = 128 # mini batch 사이즈 : 속도를 고속, 지역해를 극복하기 위해서
display_step = 100 # 시각화

(128, 784)


In [23]:
n_hidden_1 = 256
n_hidden_2 = 256
num_input = 784
num_classes = 10
X = tf.placeholder("float", [None, num_input]) # 128 X 784
Y = tf.placeholder("float", [None, num_classes])

In [24]:
weights = {
    'h1' : tf.Variable(tf.random_normal([num_input, n_hidden_1])),
    'h2' : tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out' : tf.Variable(tf.random_normal([n_hidden_2, num_classes]))
}
biases = {
    'b1' : tf.Variable(tf.random_normal([n_hidden_1])),
    'b2' : tf.Variable(tf.random_normal([n_hidden_2])),
    'out' : tf.Variable(tf.random_normal([num_classes]))
}

In [27]:
def neural_net(x):
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

In [30]:
logits = neural_net(X)
# 원핫 인코드된 값 * log(확률값)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y))
# 최적화기 : 기울기, 학습율
# adagrad : learning rate 처음에는 크게 -> 점점 작게
# momentum + propgrad : adaptive 적응적으로
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
train_op = optimizer.minimize(loss_op)
# 가장 큰 값의 인덱스를 구함 (one-hot-encoding) => target
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y,1))
# 데이터 형변환
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()

In [32]:
with tf.Session() as sess: # GPU 네트워크 연결
    sess.run(init)
    for step in range(1, num_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        sess.run(train_op, feed_dict={X:batch_x, Y:batch_y})
        if step % display_step == 0 or step == 1:
            loss, acc = sess.run([loss_op, accuracy],
                                feed_dict={X:batch_x, Y:batch_y})
            print("Step "+ str(step) + ", Minibatch Loss = "+ "{:.4f}".format(loss) +
                 ", Training Accuracy= " + "{:.3f}".format(acc))
    print("테스트 정확도:", sess.run(accuracy, feed_dict={X:mnist.test.images, Y:mnist.test.labels}))

Step 1, Minibatch Loss = 13108.4561, Training Accuracy= 0.344
Step 100, Minibatch Loss = 268.1597, Training Accuracy= 0.883
Step 200, Minibatch Loss = 174.8662, Training Accuracy= 0.852
Step 300, Minibatch Loss = 70.8949, Training Accuracy= 0.875
Step 400, Minibatch Loss = 30.5856, Training Accuracy= 0.875
Step 500, Minibatch Loss = 52.2661, Training Accuracy= 0.859
테스트 정확도: 0.8559


# 예측

In [33]:
from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
import tensorflow as tf
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['const'] = np.ones(df.shape[0])
df.tail()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,const
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,1.0
502,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08,1.0
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64,1.0
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,1.0
505,0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88,1.0


In [34]:
n, m = df.shape
X = tf.placeholder(tf.float64, shape=(n,m))
y = tf.placeholder(tf.float64, shape=(n,1))
XT = tf.transpose(X)

In [36]:
# 행렬식
# 거듭제곱 : 상관계수 506 (data point) * 14 (변수)
# 14X506, 506X14 => 14X14 (상관행렬) => 역행렬
# 14X14, 14X506 => 14X506
# 14X506, 506X1 => 14X1
w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)
y_pred = tf.matmul(X, w) # 회귀식 (506X1)
with tf.Session() as sess:
    y_pred_ = sess.run(y_pred, feed_dict={X:df.values, y:boston.target.reshape(-1,1)})
print("예측한 집값 :", y_pred_[19], "실제 집값 :", boston.target[19])

예측한 집값 : [18.40613603] 실제 집값 : 18.2


# KNN

In [37]:
import tensorflow as tf
sess = tf.Session()

val = [0,2,3]
val = tf.tile(val, [5])
print("홑일 때", sess.run(val))

홑일 때 [0 2 3 0 2 3 0 2 3 0 2 3 0 2 3]


In [39]:
val = [[1,1,1],[2,2,2],[3,3,3]]
val = tf.tile(val, [5,2])
print("2차원일때", sess.run(val))

# tile은 차원을 확대하지는 않음 (차수만 늘어남)

2차원일때 [[1 1 1 1 1 1]
 [2 2 2 2 2 2]
 [3 3 3 3 3 3]
 [1 1 1 1 1 1]
 [2 2 2 2 2 2]
 [3 3 3 3 3 3]
 [1 1 1 1 1 1]
 [2 2 2 2 2 2]
 [3 3 3 3 3 3]
 [1 1 1 1 1 1]
 [2 2 2 2 2 2]
 [3 3 3 3 3 3]
 [1 1 1 1 1 1]
 [2 2 2 2 2 2]
 [3 3 3 3 3 3]]


In [40]:
input_vecs = [[1.,2.],[2.,1.],[-2.,-1.]]
tiled_vecs = tf.tile(input_vecs, [1,3])
print(sess.run(tiled_vecs))
tiled_vecs = tf.tile(input_vecs, [2,3])
print(sess.run(tiled_vecs))

[[ 1.  2.  1.  2.  1.  2.]
 [ 2.  1.  2.  1.  2.  1.]
 [-2. -1. -2. -1. -2. -1.]]
[[ 1.  2.  1.  2.  1.  2.]
 [ 2.  1.  2.  1.  2.  1.]
 [-2. -1. -2. -1. -2. -1.]
 [ 1.  2.  1.  2.  1.  2.]
 [ 2.  1.  2.  1.  2.  1.]
 [-2. -1. -2. -1. -2. -1.]]


In [None]:
# K means : K가 결정 => 중심이 발생
- 각 데이터의 중심값을 빼서 거리값을 계산

In [42]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import datasets
from sklearn.preprocessing import scale
from tensorflow.python.framework import ops
ops.reset_default_graph()
sess = tf.Session()
iris = datasets.load_iris() # 4개 변수, target

In [46]:
num_pts = len(iris.data) # 행수
num_feats = len(iris.data[0]) # 열수
k = 3 # 군집수
generations = 25 # epoch
data_points = tf.Variable(iris.data)
cluster_labels = tf.Variable(tf.zeros([num_pts], dtype=tf.int64))
rand_starts = np.array([iris.data[np.random.choice(len(iris.data))] for _ in range(k)])
centroids = tf.Variable(rand_starts)
centroid_matrix = tf.reshape(tf.tile(centroids, [num_pts, 1]),
                            [num_pts, k, num_feats])
point_matrix = tf.reshape(tf.tile(data_points, [1,k]),
                         [num_pts, k, num_feats])
distances = tf.reduce_sum(tf.square(point_matrix - centroid_matrix), axis=2)
centroid_group = tf.argmin(distances, 1)

In [47]:
def data_group_avg(group_ids, data):
    # 라벨 번호로 값의 합계를 냄
    sum_total = tf.unsorted_segment_sum(data, group_ids, 3)
    num_total = tf.unsorted_segment_sum(tf.ones_like(data), group_ids, 3)
    avg_by_group = sum_total/num_total
    return (avg_by_group)
means = data_group_avg(centroid_group, data_points)
update = tf.group(centroids.assign(means), cluster_labels.assign(centroid_group))
init = tf.global_variables_initializer()
sess.run(init)

In [48]:
for i in range(generations):
    print('Calculating gen {}, out of {}'.format(i, generations))
    _, centroid_group_count = sess.run([update, centroid_group])
    group_count = []
    for ix in range(k):
        group_count.append(np.sum(centroid_group_count==ix))
    print("Group counts: {}".format(group_count))
[centers, assignments] = sess.run([centroids, cluster_labels])

Calculating gen 0, out of 25
Group counts: [55, 50, 45]
Calculating gen 1, out of 25
Group counts: [57, 50, 43]
Calculating gen 2, out of 25
Group counts: [60, 50, 40]
Calculating gen 3, out of 25
Group counts: [61, 50, 39]
Calculating gen 4, out of 25
Group counts: [61, 50, 39]
Calculating gen 5, out of 25
Group counts: [61, 50, 39]
Calculating gen 6, out of 25
Group counts: [61, 50, 39]
Calculating gen 7, out of 25
Group counts: [61, 50, 39]
Calculating gen 8, out of 25
Group counts: [61, 50, 39]
Calculating gen 9, out of 25
Group counts: [61, 50, 39]
Calculating gen 10, out of 25
Group counts: [61, 50, 39]
Calculating gen 11, out of 25
Group counts: [61, 50, 39]
Calculating gen 12, out of 25
Group counts: [61, 50, 39]
Calculating gen 13, out of 25
Group counts: [61, 50, 39]
Calculating gen 14, out of 25
Group counts: [61, 50, 39]
Calculating gen 15, out of 25
Group counts: [61, 50, 39]
Calculating gen 16, out of 25
Group counts: [61, 50, 39]
Calculating gen 17, out of 25
Group count

In [51]:
def most_common(my_list):
    return(max(set(my_list), key=my_list.count))
label0 = most_common(list(assignments[0:50]))
label1 = most_common(list(assignments[50:100]))
label2 = most_common(list(assignments[100:150]))
group0_count = np.sum(assignments[0:50]==label0)
group1_count = np.sum(assignments[50:100]==label1)
group2_count = np.sum(assignments[100:150]==label2)
accuracy = (group0_count + group1_count + group2_count)/150.
print('Accuracy: {:.2}'.format(accuracy))

Accuracy: 0.89
