In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator, pad_sequences, skipgrams, make_sampling_table
from tensorflow.keras.preprocessing.text import text_to_word_sequence, one_hot, hashing_trick, Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
series = np.array([i for i in range(10)])
print(series)

[0 1 2 3 4 5 6 7 8 9]


In [3]:
generator = TimeseriesGenerator(data=series,
                               targets=series,
                               length=5,
                               batch_size=1,
                               shuffle=False,
                               reverse=False)

In [4]:
print('Samples: %d' % len(generator))

Samples: 5


In [5]:
for i in range(len(generator)):
    x, y = generator[i]
    print('%s => %s' % (x, y))

[[0 1 2 3 4]] => [5]
[[1 2 3 4 5]] => [6]
[[2 3 4 5 6]] => [7]
[[3 4 5 6 7]] => [8]
[[4 5 6 7 8]] => [9]


In [6]:
model = Sequential()
model.add(Dense(10, activation='relu', input_dim=5))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [7]:
model.fit(generator, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7f85640ba8>

## Padding sequences

In [8]:
sentences = [["What", "do", "you", "like", "?"],
             ["I", "like", "basket-ball", "!"],
             ["And", "you", "?"],
             ["I", "like", "coconut", "and", "apple"]]

In [9]:
text_set = set(np.concatenate(sentences))
vocab_to_int = dict(zip(text_set, range(len(text_set))))
int_to_vocab = {vocab_to_int[word]:word for word in vocab_to_int.keys()}

In [10]:
encoded_sentences = []
for sentence in sentences:
    encoded_sentence = [vocab_to_int[word] for word in sentence]
    encoded_sentences.append(encoded_sentence)
encoded_sentences

[[9, 0, 10, 2, 5], [1, 2, 8, 11], [6, 10, 5], [1, 2, 7, 3, 4]]

In [11]:
pad_sequences(encoded_sentences)

array([[ 9,  0, 10,  2,  5],
       [ 0,  1,  2,  8, 11],
       [ 0,  0,  6, 10,  5],
       [ 1,  2,  7,  3,  4]], dtype=int32)

In [12]:
pad_sequences(encoded_sentences, maxlen=7)

array([[ 0,  0,  9,  0, 10,  2,  5],
       [ 0,  0,  0,  1,  2,  8, 11],
       [ 0,  0,  0,  0,  6, 10,  5],
       [ 0,  0,  1,  2,  7,  3,  4]], dtype=int32)

In [13]:
pad_sequences(encoded_sentences, maxlen=3)

array([[10,  2,  5],
       [ 2,  8, 11],
       [ 6, 10,  5],
       [ 7,  3,  4]], dtype=int32)

In [14]:
pad_sequences(encoded_sentences, maxlen=3, truncating='post')

array([[ 9,  0, 10],
       [ 1,  2,  8],
       [ 6, 10,  5],
       [ 1,  2,  7]], dtype=int32)

## Skip-grams

In [15]:
sentence = "I like coconut and apple"
encoded_sentence = [vocab_to_int[word] for word in sentence.split()]
vocabulary_size = len(encoded_sentence)
pairs, labels = skipgrams(encoded_sentence, 
                          vocabulary_size, 
                          window_size=1,
                          negative_samples=0)

In [16]:
for i in range(len(pairs)):
    print("({:s} , {:s} ) -> {:d}".format(
          int_to_vocab[pairs[i][0]], 
          int_to_vocab[pairs[i][1]], 
          labels[i]))

(and , apple ) -> 1
(I , like ) -> 1
(like , coconut ) -> 1
(coconut , and ) -> 1
(coconut , like ) -> 1
(and , coconut ) -> 1
(apple , and ) -> 1
(like , I ) -> 1


## Text preprocessing

In [17]:
sentence = "I like coconut , I like apple"
text_to_word_sequence(sentence, lower=False)

['I', 'like', 'coconut', 'I', 'like', 'apple']

In [18]:
text_to_word_sequence(sentence, lower=True, filters=[])

['i', 'like', 'coconut', ',', 'i', 'like', 'apple']

### Tokenizer

In [19]:
sentences = [["What", "do", "you", "like", "?"],
            ["I", "like", "basket-ball", "!"],
            ["And", "you", "?"],
            ["I", "like", "coconut", "and", "apple"]]

In [20]:
t = Tokenizer()

t.fit_on_texts(sentences)

In [21]:
print(t.word_counts)

OrderedDict([('what', 1), ('do', 1), ('you', 2), ('like', 3), ('?', 2), ('i', 2), ('basket-ball', 1), ('!', 1), ('and', 2), ('coconut', 1), ('apple', 1)])


In [22]:
print(t.document_count)

4


In [23]:
print(t.word_index)

{'like': 1, 'you': 2, '?': 3, 'i': 4, 'and': 5, 'what': 6, 'do': 7, 'basket-ball': 8, '!': 9, 'coconut': 10, 'apple': 11}


In [24]:
print(t.word_docs)

defaultdict(<class 'int'>, {'do': 1, 'what': 1, 'like': 3, '?': 2, 'you': 2, 'i': 2, '!': 1, 'basket-ball': 1, 'and': 2, 'apple': 1, 'coconut': 1})


In [25]:
t.texts_to_matrix(sentences, mode='binary')

array([[0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0.],
       [0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1.]])

In [26]:
t.texts_to_matrix(sentences, mode='count')

array([[0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0.],
       [0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1.]])

### Image preprocessing

In [27]:
# load CIFAR10 Dataset
(x_cifar10_train, y_cifar10_train), (x_cifar10_test, y_cifar10_test) = tf.keras.datasets.cifar10.load_data()

In [28]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=15,
                                                         width_shift_range=3,
                                                         height_shift_range=3,
                                                         horizontal_flip=True)

In [29]:
it = datagen.flow(x_cifar10_train, y_cifar10_train, batch_size=32)

In [30]:
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding="same", activation="relu",
                                                          input_shape=[32, 32, 3]),
                                   tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding="same", activation="relu"),
                                   tf.keras.layers.MaxPool2D(pool_size=2),
                                   tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding="same", activation="relu"),
                                   tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding="same", activation="relu"),
                                   tf.keras.layers.MaxPool2D(pool_size=2),
                                   tf.keras.layers.Flatten(),
                                   tf.keras.layers.Dense(128, activation="relu"),
                                   tf.keras.layers.Dense(10, activation="softmax")])

In [31]:
model.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.SGD(lr=0.01),
             metrics=["accuracy"])

In [32]:
!pip install SciPy

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [33]:
history = model.fit(it, epochs=10,
                   steps_per_epoch=len(x_cifar10_train)/32,
                   validation_data=(x_cifar10_test, y_cifar10_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
