In [20]:
from keras import Input
from keras.layers import Dense, Embedding, LSTM, concatenate, Conv1D, MaxPooling1D, GlobalMaxPooling1D, Conv2D, AveragePooling2D, add, MaxPooling2D
from keras.models import Sequential, Model
from keras.applications import Xception
import numpy as np

In [15]:
# simple NN
model = Sequential([
    Dense(32, activation='relu', input_shape=(64,)),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

In [3]:
# equivalent NN using functional API
input_tensor = Input(shape=(64,))
x = Dense(32, activation='relu')(input_tensor)
x = Dense(32, activation='relu')(x)
output_tensor = Dense(10, activation='softmax')(x)
model = Model(input_tensor, output_tensor)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64)]              0         
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 32)                1056      
                                                                 
 dense_5 (Dense)             (None, 10)                330       
                                                                 
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


## Multiple input models
Network which gets question and reference text as an input and outputs an answer to that question.

In [4]:
text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

text_input = Input(shape=(None,), dtype='int32', name='text')
embedded_text = Embedding(text_vocabulary_size, 64)(text_input)
encoded_text = LSTM(32)(embedded_text)

question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = Embedding(question_vocabulary_size, 32)(question_input)
encoded_question = LSTM(16)(embedded_question)

concatenated = concatenate([encoded_text, encoded_question], axis=-1)

answer = Dense(answer_vocabulary_size, activation='softmax')(concatenated)

model = Model([text_input, question_input], answer) # you can also pass a dictionary

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None, None)]       0           []                               
                                                                                                  
 question (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding (Embedding)          (None, None, 64)     640000      ['text[0][0]']                   
                                                                                                  
 embedding_1 (Embedding)        (None, None, 32)     320000      ['question[0][0]']               
                                                                                            

In [5]:
num_samples = 1000
max_len = 100

In [6]:
text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_len))
question = np.random.randint(1, question_vocabulary_size, size=(num_samples, max_len))
answers = np.random.randint(0, 2, size=(num_samples, answer_vocabulary_size))

model.fit([text, question], answers, epochs=10, batch_size=128, verbose=0)
# model.fit({'text': text, 'question': question}, answers, epochs=10, batch_size=128)

<keras.callbacks.History at 0x7fe4b0204190>

## Multiple output models
Network that takes as an input some statement from social media and predict age, income and gender of an author.

In [20]:
vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = Embedding(vocabulary_size, 256)(posts_input)
x = Conv1D(128, 5, activation='relu')(embedded_posts)
x = MaxPooling1D(5)(x)
x = Conv1D(256, 5, activation='relu')(x)
x = Conv1D(256, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(256, 5, activation='relu')(x)
x = Conv1D(256, 5, activation='relu')(x)
x = GlobalMaxPooling1D()(x)
x = Dense(128, activation='relu')(x)

age_pred = Dense(1, name='age')(x)
income_pred = Dense(num_income_groups, activation='softmax', name='income')(x)
gender_pred = Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_pred, income_pred, gender_pred])
model.compile(optimizer='adam', metrics=['accuracy'], loss=['mae', 'sparse_categorical_crossentropy', 'binary_crossentropy']) # you can also use dictionary with keys: age, gender, income
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 posts (InputLayer)             [(None, None)]       0           []                               
                                                                                                  
 embedding_4 (Embedding)        (None, None, 256)    12800000    ['posts[0][0]']                  
                                                                                                  
 conv1d_10 (Conv1D)             (None, None, 128)    163968      ['embedding_4[0][0]']            
                                                                                                  
 max_pooling1d_4 (MaxPooling1D)  (None, None, 128)   0           ['conv1d_10[0][0]']              
                                                                                            

In [8]:
posts = np.random.randint(1, vocabulary_size, size=(num_samples,500))
target_age = np.random.randint(18, 99, size=num_samples)
target_income = np.random.randint(1, 10, size=num_samples)
target_gender = np.random.randint(0, 2, size=num_samples)

model.fit(posts, [target_age, target_income, target_gender], batch_size=128, epochs=3) # or with dictionary

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fe44bf56410>

## Directed acyclic graphs
Inception architecture example

In [10]:
input_tensor = Input(shape=(None,28,28))

branch_a = Conv2D(128, 1, activation='relu', strides=2)(input_tensor)
#print(branch_a.shape)

branch_b = Conv2D(128, 1, activation='relu')(input_tensor)
branch_b = Conv2D(128, 3, activation='relu', strides=2, padding='same')(branch_b)
branch_b = Conv2D(128, 1, activation='relu')(branch_b)
#print(branch_b.shape)

branch_c = AveragePooling2D(3, strides=2, padding='same')(input_tensor)
branch_c = Conv2D(128, 1, activation='relu')(branch_c)
#print(branch_c.shape)

branch_d = Conv2D(128, 1, activation='relu')(input_tensor)
branch_d = Conv2D(128, 3, activation='relu', padding='same')(branch_d)
branch_d = Conv2D(128, 1, activation='relu')(branch_d)
branch_d = Conv2D(128, 3, activation='relu', strides=2, padding='same')(branch_d)
branch_d = Conv2D(128, 1, activation='relu')(branch_d)
#print(branch_d.shape)

output = concatenate([branch_a, branch_b, branch_c, branch_d],axis=-1)

model = Model(inputs=input_tensor, outputs=output)

## Residual connections

In [15]:
x = Input(shape=(32,32,128))
y = Conv2D(128, 3, activation='relu', padding='same')(x)
y = Conv2D(128, 3, activation='relu', padding='same')(y)
y = Conv2D(128, 3, activation='relu', padding='same')(y)

y = add([y, x])

In [13]:
x = Input(shape=(32,32,3))
y = Conv2D(128, 3, activation='relu', padding='same')(x)
y = Conv2D(128, 3, activation='relu', padding='same')(y)
y = MaxPooling2D(2, strides=2)(y)

residual = Conv2D(128, 1, strides=2, padding='same')(x) # to have the same shapes

y = add([y, residual])

## Shared layers
Semantic similarity example

In [19]:
lstm = LSTM(32)

left_input = Input(shape=(None,128))
left_output = lstm(left_input)

right_input = Input(shape=(None,128))
right_output = lstm(right_input)

merged = concatenate([left_output, right_output], axis=-1)
predictions = Dense(1, activation='sigmoid')(merged)

model = Model([left_input, right_input], predictions)

# model.fit([right_data, left_data], targets)

## Models as layers

In [21]:
xception_base = Xception(weights=None, include_top=False)

left_input = Input(shape=(250,250,3))
right_input = Input(shape=(250,250,3))

left_features = xception_base(left_input)
rigth_features = xception_base(right_input)

merged_features = concatenate([left_features, rigth_features], axis=-1)