### 텐서플로 1.x 버전의 그래프 코드를 2.x 에서 실행

In [1]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
in_a = tf.placeholder(dtype=tf.float32, shape=(2))
in_a

Instructions for updating:
non-resource variables are not supported in the long term


<tf.Tensor 'Placeholder:0' shape=(2,) dtype=float32>

In [2]:
def model(x):
    with tf.variable_scope("matmul"):
        w = tf.get_variable('w', initializer=tf.ones(shape=(2,2)))
        b = tf.get_variable('b', initializer=tf.zeros(shape=(2)))
        return x * w + b
out_a = model(in_a)
out_a

<tf.Tensor 'matmul/add:0' shape=(2, 2) dtype=float32>

In [3]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    outs = sess.run([out_a],
                   feed_dict={in_a: [1,0]})
    writer = tf.summary.FileWriter('./logs/example', sess.graph)

In [18]:
outs, writer

([array([[1., 0.],
         [1., 0.]], dtype=float32)],
 <tensorflow.python.summary.writer.writer.FileWriter at 0x7faa7dad3b50>)

### 텐서플로 2.x의 이해

#### 오토그래프 생성

In [6]:
import tensorflow as tf
def linear_layer(x):
    return 3 * x + 2
@tf.function
def simple_nn(x):
    return tr.nn.relu(linear_layer(x))
def simple_function(x):
    return 3*x

In [7]:
# 자동 생성된 코드의 내부 관찰
print(tf.autograph.to_code(simple_nn.python_function, experimental_optional_features=None))

def tf__simple_nn(x):
    do_return = False
    retval_ = ag__.UndefinedReturnValue()
    with ag__.FunctionScope('simple_nn', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
        try:
            do_return = True
            retval_ = fscope.mark_return_value(ag__.converted_call(tr.nn.relu, (ag__.converted_call(linear_layer, (x,), None, fscope),), None, fscope))
        except:
            do_return = False
            raise
    (do_return,)
    return ag__.retval(retval_)



In [10]:
simple_nn

<tensorflow.python.eager.def_function.Function at 0x7f9398e79210>

In [11]:
simple_function

<function __main__.simple_function(x)>

#### 오토그래프 실행타임(어노테이션 vs none어노테이션)

In [16]:
import tensorflow as tf
import timeit

cell = tf.keras.layers.LSTMCell(100)

@tf.function  # 어노테이션
def fn(input, state):
    return cell(input, state)

input = tf.zeros([100,100])
state = [tf.zeros([100,100])] * 2

# 워밍업
cell(input, state)
fn(input, state)

graph_time = timeit.timeit(lambda: cell(input, state), number=100)
auto_graph_time = timeit.timeit(lambda: fn(input, state), number=100)
print('graph_time:', graph_time)
print('auto_graph_time:', auto_graph_time)

graph_time: 0.48972320099983335
auto_graph_time: 0.08006397599979209


## 순차적 API 모델의 예

<img src="model.png" width=300>

## 함수적 API 모델의 예

In [22]:
import tensorflow as tf

def build_model():
    # 가변 길이 정수의 시퀀스 1
    text_input_a = tf.keras.Input(shape=(None,), dtype='int32')
    
    # 가변 길이 정수의 시퀀스 2
    text_input_b = tf.keras.Input(shape=(None,), dtype='int32')
    
    # 1000개의 고유 단어를 128차원(128개의 컬럼) 벡터에 매핑해서 임베딩
    shared_embedding = tf.keras.layers.Embedding(1000, 128)
    
    # 양쪽 입력을 인코딩하고자 동일한 계층 재사용
    encoded_input_a = shared_embedding(text_input_a)
    encoded_input_b = shared_embedding(text_input_b)
    
    # 최종적으로 2개의 로지스틱 예측
    prediction_a = tf.keras.layers.Dense(1, activation='sigmoid', name='prediction_a')(encoded_input_a)
    prediction_b = tf.keras.layers.Dense(1, activation='sigmoid', name='prediction_b')(encoded_input_b)
    
    # 이 모델은 2개의 입력과 2개의 출력을 가진다.
    # 가운데는 공유 모델이 있다.
    model = tf.keras.Model(inputs=[text_input_a, text_input_b],
                          outputs=[prediction_a, prediction_b])
    tf.keras.utils.plot_model(model, to_file='shared_model.png')

In [23]:
build_model()

<img src='model/shared_model.png' width=500>

## 모델 서브클래싱의 예

In [2]:
from tensorflow.keras import layers
class Mylayer(layers.Layer):
    # __init__ : 선택적으로 이 계층에서 사용할 모든 하위 계층을 정의하는데 사용한다.
    # 모델을 선언할 떄의 생성자(constructor)다.
    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(Mylayer, self).__init__(**kwargs)
    
    # build : 계층의 가중치를 생성할 때 사용한다.
    # add_weight()로 가중치를 추가 할 수 있다.
    def build(self, input_shape):
        # 이 계층의 훈련 가능한 가중치 변수 생성
        self.kernel = self.add_weight(name='kernel',
                                     shape=(input_shape[1], self.output_dim),
                                     initializer='uniform',
                                     trainable=True)
    
    # call : 순방향 전달을 정의한다.
    # 계층이 호출되고 함수 형식으로 체인되는 곳이다.
    def call(self, inputs):
        # 곱셈 수행과 반환
        return tf.matmul(inputs, self.kernel)
    
    # 선택적으로 get_config()를 사용해 계층을 직렬화(serialize)할 수 있고
    # from_config()를 사용하면 역직렬화(deserialize)할 수 있다.

### 콜백 코드 작성 예

In [None]:
callbacks = [
    # 텐서보드 로그를 './logs' 디렉터리에 작성
    tf.keras.callbacks.Tensorboard(log_dir='./logs')
]
model.fit(data, labels, batch_size=BATCH_SIZE, epochs=EPOCH,
         callbacks=callbacks, validation_data=(val_data, val_labels))

## 모델과 가중치 저장

In [None]:
model.save_weights('./my_model') # 가중치를 텐서플로 체크포인트(checkpoint) 파일로 저장
model.load_weights(file_path) # 모델 상태 복원

## 가중치 이외 모델 json형식으로 저장

In [None]:
json_string = model.to_json() # 저장
model = tf.keras.models.model_from_json(json_string) # 복원

In [None]:
model_json = model.to_json()
with open('model/6.Deeopening_CNN_model_1', 'w') as json_file:
    json_file.write(model_json)
model.save_weights('model/6.Deeopening_CNN_model_1_weights.h5')

path = ''
model = tf.keras.models.model_from_json(open(path).read())
m.load_weights('model/6.Deeopening_CNN_model_1_weights.h5')

## 가중치 이외 모델 YAML로 직렬화

In [None]:
yaml_string = model.to_yaml() # 저장
model = tf.keras.models.model_from_yaml(yaml_string) # 복원

## 모델을 가중치와 최적화 매개변수와 함께 저장

In [None]:
model.save('my_model.h5') # 저장
model = tf.keras.models.load_model('mt_model.h5') # 복원

In [36]:
import tensorflow as tf
import tensorflow_datasets as tfds

# 등록된 데이터셋 보기
builders = tfds.list_builders()
print(builders)

['abstract_reasoning', 'aeslc', 'aflw2k3d', 'amazon_us_reviews', 'arc', 'bair_robot_pushing_small', 'beans', 'big_patent', 'bigearthnet', 'billsum', 'binarized_mnist', 'binary_alpha_digits', 'blimp', 'c4', 'caltech101', 'caltech_birds2010', 'caltech_birds2011', 'cars196', 'cassava', 'cats_vs_dogs', 'celeb_a', 'celeb_a_hq', 'cfq', 'chexpert', 'cifar10', 'cifar100', 'cifar10_1', 'cifar10_corrupted', 'citrus_leaves', 'cityscapes', 'civil_comments', 'clevr', 'cmaterdb', 'cnn_dailymail', 'coco', 'coil100', 'colorectal_histology', 'colorectal_histology_large', 'common_voice', 'cos_e', 'crema_d', 'curated_breast_imaging_ddsm', 'cycle_gan', 'deep_weeds', 'definite_pronoun_resolution', 'dementiabank', 'diabetic_retinopathy_detection', 'div2k', 'dmlab', 'downsampled_imagenet', 'dsprites', 'dtd', 'duke_ultrasound', 'emnist', 'eraser_multi_rc', 'esnli', 'eurosat', 'fashion_mnist', 'flic', 'flores', 'food101', 'forest_fires', 'gap', 'geirhos_conflict_stimuli', 'german_credit_numeric', 'gigaword', '

In [37]:
data, info = tfds.load('mnist', with_info=True)
train_data, test_data = data['train'], data['test']

print(info)

[1mDownloading and preparing dataset mnist/3.0.1 (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /Users/HumanRevolution/tensorflow_datasets/mnist/3.0.1...[0m


local data directory. If you'd instead prefer to read directly from our public
GCS bucket (recommended if you're running on GCP), you can instead pass
`try_gcs=True` to `tfds.load` or set `data_dir=gs://tfds-data/datasets`.



HBox(children=(FloatProgress(value=0.0, description='Dl Completed...', max=4.0, style=ProgressStyle(descriptio…



[1mDataset mnist downloaded and prepared to /Users/HumanRevolution/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.[0m
tfds.core.DatasetInfo(
    name='mnist',
    version=3.0.1,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)



### numpy 배열로 데이터셋을 작성

In [44]:
## 데이터셋은 다운로드하거나 섞거나 배치시키거나 생성자에서 분할할 수 있다.
import tensorflow as tf
import numpy as np

num_items = 100
num_list = np.arange(num_items)
print(num_list)
# numpy 배열로 데이터셋 생성
num_list_dataset = tf.data.Dataset.from_tensor_slices(num_list)
print(num_list_dataset)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
<TensorSliceDataset shapes: (), types: tf.int64>


In [49]:
datasets, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
datasets, info

({'test': <PrefetchDataset shapes: ((), ()), types: (tf.string, tf.int64)>,
  'train': <PrefetchDataset shapes: ((), ()), types: (tf.string, tf.int64)>,
  'unsupervised': <PrefetchDataset shapes: ((), ()), types: (tf.string, tf.int64)>},
 tfds.core.DatasetInfo(
     name='imdb_reviews',
     version=1.0.0,
     description='Large Movie Review Dataset.
 This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.',
     homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
     features=FeaturesDict({
         'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
         'text': Text(shape=(), dtype=tf.string),
     }),
     total_num_examples=100000,
     splits={
         'test': 25000,
         'train': 25000,
         'unsupervised': 50000,
     },
     supervised_key

In [50]:
train_dataset = datasets['train']
train_dataset = train_dataset.batch(5).shuffle(50).take(2)

for data in train_dataset:
    print(data)

RuntimeError: __iter__() is only supported inside of tf.function or when eager execution is enabled.

In [35]:
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
np.random.seed(42)

df = pd.read_csv('../project/modeling_2nd/visualization/processedData.csv', encoding='cp949')
X = df.drop('transaction_real_price', axis=1)
Y = df.transaction_real_price.values
train_x,test_x,train_y,test_y = train_test_split(X,Y,test_size=0.2, random_state=42)

train_x.shape,train_y.shape,test_x.shape,test_y.shape

my_feature_columns = []
for key in train_x.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))

# 각 은닉층에 10개의 노드가 있으며, 2개의 은닉층을 가진 DNN구축
classifier = tf.estimator.DNNClassifier(
    feature_columns = my_feature_columns,
    hidden_units = [10,10],  # 각 10개의 노드를 가진 두 은닉층
    n_classes = 3  # 모델은 3개 부류중에서 선택해야 한다.
)

import tensorflow as tf
import tensorflow_datasets as tfds

BUFFER_SIZE = 10000
BATCH_SIZE = 64

def input_fn(mode):
    datasets, info = tfds.load(name='mnist',
                                with_info=True,
                                as_supervised=True)
    mnist_dataset = (datasets['train'] if mode == tf.estimator.ModeKeys.TRAIN else
                   datasets['test'])

    def scale(image, label):
        image = tf.cast(image, tf.float32)
        image /= 255
        return image, label

    return mnist_dataset.map(scale).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

test = input_fn('test')
train = input_fn(tf.estimator.ModeKeys.TRAIN)

print(test)
print(train)

tf.estimator.train_and_evaluate(
    classifier,
    train_spec = tf.estimator.TrainSpec(input_fn=input_fn),
    eval_spec = tf.estimator.EvalSpec(input_fn=input_fn)
)

In [15]:
import tensorflow as tf

x = tf.constant(4.0)
with tf.GradientTape(persistent=True) as g:
    g.watch(x)
    y = x**2
    z = y**2
dz_dx = g.gradient(z, x)  # 256.0 (4*x^3 as x = 4)
dy_dx = g.gradient(y, x)  # 8.0
print(dz_dx)
print(dy_dx)
del g  # 테이프에 대한 참조 제거

tf.Tensor(256.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)


In [17]:
import tensorflow as tf
import numpy as np
from tensorflow import keras as k
# n_train_examples = 1024*1024
# n_features = 10
# size_batches = 256

# 반 개구간 [0.0, 1.0)에서 10개의 랜덤 floats
x = np.random.random((1024*1024, 10))
y = np.random.randint(2, size=(1024*1024, 1))
x = tf.dtypes.cast(x, tf.float32)
print(x)
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.shuffle(buffer_size=1024*1024).batch(256)

tf.Tensor(
[[0.53553987 0.18084869 0.87275076 ... 0.17509083 0.37267074 0.54288733]
 [0.34120852 0.42946884 0.7360805  ... 0.39392975 0.22204018 0.9809915 ]
 [0.60030746 0.23963866 0.6672207  ... 0.8861854  0.82371837 0.13119906]
 ...
 [0.50404215 0.31159058 0.08733454 ... 0.5346817  0.8925573  0.6928494 ]
 [0.8850832  0.62569654 0.5277377  ... 0.9119927  0.1660605  0.9879514 ]
 [0.5085873  0.26216918 0.7740833  ... 0.3462564  0.63982284 0.76666874]], shape=(1048576, 10), dtype=float32)


In [18]:
x.shape, y.shape

(TensorShape([1048576, 10]), (1048576, 1))

In [19]:
dataset

<BatchDataset shapes: ((None, 10), (None, 1)), types: (tf.float32, tf.int64)>

In [23]:
# 분산전략
distribution = tf.distribute.MirroredStrategy()

# 이 코드 부분이 다중 GPU로 분산된다.
with distribution.scope():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(10,)))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.SGD(.2)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    model.summary()
    
# 평소대로 최적화하지만 실제로 GPU를 사용하고 있다.
model.fit(dataset, epochs=5, steps_per_epoch=10)





INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 16)                176       
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 17        
Total params: 193
Trainable params: 193
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fe5a7c9ab90>

In [1]:
%load_ext tensorboard
%tensorboard --logdir logs

Reusing TensorBoard on port 6009 (pid 59118), started 2:24:09 ago. (Use '!kill 59118' to kill it.)