<a href="https://colab.research.google.com/github/kavehkarimadini/ARC_101_Tensorflow_Zero2Hero/blob/main/Chapter_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing
import tensorflow_datasets as tfds

In [9]:
max_len = 200
n_words = 10000
dim_embedding = 256
EPOCHS = 5
BATCH_SIZE = 500
def load_data():
  # Load data.
  (X_train, y_train), (X_test, y_test) = datasets.imdb.load_data(num_words=n_words)
  # Pad sequences with max_len.
  X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
  X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)
  return (X_train, y_train), (X_test, y_test)

In [10]:
def build_model():
  model = models.Sequential()
  # Input: - eEmbedding Layer.
  # The model will take as input an integer matrix of size (batch,
  # input_length).
  # The model will output dimension (input_length, dim_embedding).
  # The largest integer in the input should be no larger
  # than n_words (vocabulary size).
  model.add(layers.Embedding(n_words, dim_embedding, input_length=max_len))
  model.add(layers.Dropout(0.3))
  # Takes the maximum value of either feature vector from each of
  # the n_words features.
  model.add(layers.GlobalMaxPooling1D())
  model.add(layers.Dense(128, activation='relu'))
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(1, activation='sigmoid'))
  # model.summary()
  return model

In [11]:
(X_train, y_train), (X_test, y_test) = load_data()


In [12]:
model = build_model()
model.compile(optimizer = "adam", loss = "binary_crossentropy",
 metrics = ["accuracy"])
model.summary()

In [13]:
score = model.fit(X_train, y_train,
 epochs = EPOCHS,
 batch_size = BATCH_SIZE,
 validation_data = (X_test, y_test)
)

Epoch 1/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 716ms/step - accuracy: 0.5825 - loss: 0.6779 - val_accuracy: 0.8103 - val_loss: 0.5528
Epoch 2/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 686ms/step - accuracy: 0.8194 - loss: 0.4575 - val_accuracy: 0.8552 - val_loss: 0.3486
Epoch 3/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 676ms/step - accuracy: 0.8815 - loss: 0.2934 - val_accuracy: 0.8718 - val_loss: 0.3091
Epoch 4/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 646ms/step - accuracy: 0.9138 - loss: 0.2225 - val_accuracy: 0.8766 - val_loss: 0.2941
Epoch 5/5
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 671ms/step - accuracy: 0.9382 - loss: 0.1761 - val_accuracy: 0.8754 - val_loss: 0.2886


In [14]:
model.summary()

In [15]:
score = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print("\nTest score:", score[0])
print('Test accuracy:', score[1])

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 88ms/step - accuracy: 0.8751 - loss: 0.2895

Test score: 0.2885791063308716
Test accuracy: 0.8754400014877319


In [16]:
model.save('my_model.h5') # save
model = tf.keras.models.load_model('my_model.h5') #restore



In [17]:
# See all registered datasets
builders = tfds.list_builders()
print(builders)

['abstract_reasoning', 'accentdb', 'aeslc', 'aflw2k3d', 'ag_news_subset', 'ai2_arc', 'ai2_arc_with_ir', 'aloha_mobile', 'amazon_us_reviews', 'anli', 'answer_equivalence', 'arc', 'asqa', 'asset', 'assin2', 'asu_table_top_converted_externally_to_rlds', 'austin_buds_dataset_converted_externally_to_rlds', 'austin_sailor_dataset_converted_externally_to_rlds', 'austin_sirius_dataset_converted_externally_to_rlds', 'bair_robot_pushing_small', 'bc_z', 'bccd', 'beans', 'bee_dataset', 'beir', 'berkeley_autolab_ur5', 'berkeley_cable_routing', 'berkeley_fanuc_manipulation', 'berkeley_gnm_cory_hall', 'berkeley_gnm_recon', 'berkeley_gnm_sac_son', 'berkeley_mvp_converted_externally_to_rlds', 'berkeley_rpt_converted_externally_to_rlds', 'big_patent', 'bigearthnet', 'billsum', 'binarized_mnist', 'binary_alpha_digits', 'ble_wind_field', 'blimp', 'booksum', 'bool_q', 'bot_adversarial_dialogue', 'bridge', 'bridge_data_msr', 'bucc', 'c4', 'c4_wsrs', 'caltech101', 'caltech_birds2010', 'caltech_birds2011', 'c

In [18]:
# Load a given dataset by name, along with the DatasetInfo metadata
data, info = tfds.load("mnist", with_info=True)
train_data, test_data = data['train'], data['test']
print(info)

Downloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...


Dl Completed...:   0%|          | 0/5 [00:00<?, ? file/s]

Dataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.
tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_dir='/root/tensorflow_datasets/mnist/incomplete.QPPPMO_3.0.1/',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
    

In [19]:
import tensorflow as tf
import numpy as np
num_items = 100
num_list = np.arange(num_items)
# create the dataset from numpy array
num_list_dataset = tf.data.Dataset.from_tensor_slices(num_list)

In [20]:
datasets, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset = datasets['train']
train_dataset = train_dataset.batch(5).shuffle(50).take(2)
for data in train_dataset:
 print(data)

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.MRR10M_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.MRR10M_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.MRR10M_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.
(<tf.Tensor: shape=(5,), dtype=string, numpy=
array([b'Although it\'s most certainly politically incorrect to be entertained by a drunk, there\'s such a charm to Dudley Moore\'s portrayal of lovable lush, Arthur Bach one can\'t help but feel for this unique and wonderful character. How can you not be entertained by that infectious laugh and giggle and utter silliness. Although I\'m not really a Liza Minnelli fan, she was really excellent as Linda Marolla and I couldn\'t picture anyone else in that role. Sir John Gielgud was the heart of the film and deserved his Oscar. The rest of the cast also excellent and that great tune "Arthur\'s Theme", wow. Truly this was one of the Best Comedies of the 1980s. Great films get better with each viewing and that is the case with "Arthur."',
       b'Farewell Friend aka Adieu L\'Ami/Honour Among Thieves isn\