In [1]:
import tensorflow as tf
x = tf.Variable(10.)
with tf.GradientTape() as tape:
  y = 2*x+3
tape.gradient(y,x).numpy()

2.0

In [2]:
x = tf.Variable(10.)
with tf.GradientTape() as tape:
  y = x**2
tape.gradient(y,x).numpy()

20.0

Mnist의 데이터를 이용해서 tensoflow의 라이브러리 레이를 적용한 모델 만들고 학습

In [3]:
import tensorflow as tf

In [4]:
(x_train,y_train),(x_test,y_test) =  tf.keras.datasets.mnist.load_data()
x_train_sacled = x_train / 255.0
x_test_sacled = x_test / 255.0
y_train = tf.keras.utils.to_categorical(y_train,10)
y_test = tf.keras.utils.to_categorical(y_test,10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
x_test_sacled.shape, y_train.shape

((10000, 28, 28), (60000, 10))

In [6]:
inputs = tf.keras.Input(shape=(28,28))
x = tf.keras.layers.Flatten()(inputs)
x = tf.keras.layers.Dense(512,activation='relu')(x)
outputs = tf.keras.layers.Dense(10,activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 512)               401920    
                                                                 
 dense_1 (Dense)             (None, 10)                5130      
                                                                 
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [8]:
x_train_sacled.shape

(60000, 28, 28)

In [9]:
model.compile(optimizer='adam', loss = tf.keras.losses.categorical_crossentropy,metrics=['accuracy'])
model.fit(x_train_sacled,y_train,epochs=5,batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f1af62ae8f0>

코드로 구현해 보기..

In [10]:
class NativeDense:
  def __init__(self,input_size,output_size,activation):
    self.activation = activation
    w_shape = (input_size,output_size)
    w_initial_value = tf.random.uniform(w_shape,minval=0, maxval=1e-1)
    self.W = tf.Variable(w_initial_value)

    b_shape = (output_size,)
    b_inital_value = tf.zeros(b_shape)
    self.b = tf.Variable(b_inital_value)
  def __call__(self,inputs):
    return self.activation( tf.matmul(inputs,self.W)+self.b )
  @property
  def weight(self):
    return [self.W, self.b]

In [11]:
class NativeSequential:
  def __init__(self, layers):
    self.layers = layers
  def __call__(self, inputs):
    x = inputs
    for layer in self.layers:
      x = layer(x)
    return x
  @property
  def weights(self):
    weights = []
    for layer in self.layers:
       weights += layer.weight
    return weights

In [43]:
nd1 = NativeDense(input_size=28*28, output_size=512, activation = tf.nn.relu)
nd2 = NativeDense(input_size=512, output_size=10, activation = tf.nn.softmax)
model = NativeSequential([nd1,nd2])
len(model.weights)

4

미니배치

In [44]:
import math
class BatchGenerator:
  def __init__(self,images, labels, batch_size=128):
    self.index = 0
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
    self.num_batches = math.ceil(len(images) / batch_size)
  def next(self):
    images = self.images[self.index:self.index+self.batch_size]
    labels = self.labels[self.index:self.index+self.batch_size]
    self.index += self.batch_size
    return images,labels

훈련스텝

In [45]:
lr = 1e-3
def update_weights(gradients, weights):
  for g, w in zip(gradients, weights):
    w.assign_sub(g*lr)

In [46]:
def one_traning_step(model, images_batch, labels_batch):
  with tf.GradientTape() as tape:
    predictions = model(images_batch)
    per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch,predictions)
    average_loss = tf.reduce_mean(per_sample_losses)
  gradients =  tape.gradient(average_loss,model.weights)
  update_weights(gradients,model.weights)
  return average_loss

In [47]:
# optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def update_weights(gradients,weights):
    optimizer.apply_gradients(zip(gradients,weights))

전체 훈련 루프

In [48]:
def fit(model, images,labels,epochs,batch_size=128):
  for epoch_counter in range(epochs):
    print(f"epoch {epoch_counter+1}")
    batch_generator =  BatchGenerator(images,labels)
    for batch_counter in range(batch_generator.num_batches):
      images_batch, labels_batch =  batch_generator.next()
      loss = one_traning_step(model,images_batch, labels_batch)
      if batch_counter % 100 == 0:
        print(f"{batch_counter}번째 손실값 : {loss:.2f}")

In [49]:
(train_images,train_labels),(test_images,test_labels) = tf.keras.datasets.mnist.load_data()
train_images = train_images.reshape(-1,28*28)
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape(-1,28*28)
test_images = test_images.astype("float32") / 255
fit(model,train_images, train_labels, epochs=10,batch_size=128)

epoch 1
0번째 손실값 : 5.03
100번째 손실값 : 0.46
200번째 손실값 : 0.31
300번째 손실값 : 0.30
400번째 손실값 : 0.52
epoch 2
0번째 손실값 : 0.30
100번째 손실값 : 0.23
200번째 손실값 : 0.25
300번째 손실값 : 0.25
400번째 손실값 : 0.50
epoch 3
0번째 손실값 : 0.18
100번째 손실값 : 0.20
200번째 손실값 : 0.24
300번째 손실값 : 0.25
400번째 손실값 : 0.46
epoch 4
0번째 손실값 : 0.17
100번째 손실값 : 0.20
200번째 손실값 : 0.24
300번째 손실값 : 0.26
400번째 손실값 : 0.35
epoch 5
0번째 손실값 : 0.18
100번째 손실값 : 0.18
200번째 손실값 : 0.25
300번째 손실값 : 0.23
400번째 손실값 : 0.31
epoch 6
0번째 손실값 : 0.18
100번째 손실값 : 0.18
200번째 손실값 : 0.24
300번째 손실값 : 0.20
400번째 손실값 : 0.26
epoch 7
0번째 손실값 : 0.17
100번째 손실값 : 0.17
200번째 손실값 : 0.22
300번째 손실값 : 0.18
400번째 손실값 : 0.20
epoch 8
0번째 손실값 : 0.15
100번째 손실값 : 0.14
200번째 손실값 : 0.18
300번째 손실값 : 0.15
400번째 손실값 : 0.16
epoch 9
0번째 손실값 : 0.13
100번째 손실값 : 0.11
200번째 손실값 : 0.13
300번째 손실값 : 0.11
400번째 손실값 : 0.14
epoch 10
0번째 손실값 : 0.10
100번째 손실값 : 0.08
200번째 손실값 : 0.09
300번째 손실값 : 0.09
400번째 손실값 : 0.12


모델 평가

In [52]:
import numpy as np

predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions,axis=1)
maches = predicted_labels == test_labels
print(f"정확도 : {maches.mean():.2f}")

정확도 : 0.96
