In [112]:
import pandas as pd
import numpy as np
from struct import unpack
from base64 import b64decode
import string

In [113]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, concatenate, BatchNormalization, Multiply, Add
from keras.callbacks import EarlyStopping
from keras.utils.np_utils import to_categorical

In [114]:
IMG_LEN = 1024
TXT_LEN = 300
N_CLASSES = 50

In [115]:
filename = "/Users/dmitry/Downloads/topics_dataset.json"
df = pd.read_json(filename, lines=True)

In [116]:
from functools import partial

def unpck(l, x):
    return unpack('%df' % l, b64decode(x.encode('utf-8')))

unpck_img = partial(unpck, IMG_LEN)
unpck_txt = partial(unpck, TXT_LEN)

In [117]:
df_q = df.sample(frac=0.25)
x_img_q = np.stack(df_q['x1'].map(unpck_img), axis=0)
x_txt_q = np.stack(df_q['x2'].map(unpck_txt), axis=0)
y_q = to_categorical(np.array(df_q['y1']), N_CLASSES)

In [118]:
from sklearn.preprocessing import MinMaxScaler, normalize

norm_x_img_q = normalize(x_img_q)
# img_scaler = MinMaxScaler()
# img_scaler.fit(norm_x_img_q)
# scaled_x_img_q = img_scaler.transform(norm_x_img_q)

norm_x_txt_q = normalize(x_txt_q)
# txt_scaler = MinMaxScaler()
# txt_scaler.fit(norm_x_txt_q)
# scaled_x_txt_q = txt_scaler.transform(norm_x_txt_q)


In [119]:
from sklearn.model_selection import train_test_split
x_img_train, x_img_test, x_txt_train, x_txt_test, y_train, y_test = train_test_split(norm_x_img_q, norm_x_txt_q, y_q, test_size=0.2, random_state=42)

In [120]:
es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=0.001, patience=3)

def get_model_1():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(64, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(64, activation='relu')(x_img)

    x_txt = Dense(64, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(64, activation='relu')(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.25)(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [121]:
model_1 = get_model_1()
model_1.fit([x_img_train, x_txt_train], y_train, epochs=10, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1127d8450>

In [143]:
def get_model_mult():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(64, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(64, activation='relu')(x_img)

    x_txt = Dense(64, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(64, activation='relu')(x_txt)

    x = Multiply()([x_img, x_txt])
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.25)(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [146]:
model_mult = get_model_mult()
model_mult.fit([x_img_train, x_txt_train], y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x13bb9f250>

In [147]:
def get_model_add():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(64, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(64, activation='relu')(x_img)

    x_txt = Dense(64, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(64, activation='relu')(x_txt)

    x = Add()([x_img, x_txt])
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.25)(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [148]:
model_add = get_model_add()
model_add.fit([x_img_train, x_txt_train], y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1656c9a90>

In [150]:
def get_model_mix():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(64, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(64, activation='relu')(x_img)

    x_txt = Dense(64, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(64, activation='relu')(x_txt)

    add = Add()([x_img, x_txt])
    add = Dense(128, activation='relu')(add)
    add = Dropout(0.25)(add)
    
    mult = Multiply()([x_img, x_txt])
    mult = Dense(128, activation='relu')(mult)
    mult = Dropout(0.25)(mult)
    
    mix = concatenate([add, mult])
    mix = Dense(128, activation='relu')(mix)
    mix = Dropout(0.25)(mix)

    out = Dense(50, activation='softmax')(mix)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [152]:
def get_model_wide_mix():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(512, activation='relu')(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(512, activation='relu')(x_txt)

    add = Add()([x_img, x_txt])
    add = Dense(512, activation='relu')(add)
    add = Dropout(0.25)(add)
    
    mult = Multiply()([x_img, x_txt])
    mult = Dense(512, activation='relu')(mult)
    mult = Dropout(0.25)(mult)
    
    mix = concatenate([add, mult])
    mix = Dense(512, activation='relu')(mix)
    mix = Dropout(0.25)(mix)

    out = Dense(50, activation='softmax')(mix)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [151]:
model_mix = get_model_mix()
model_mix.fit([x_img_train, x_txt_train], y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x18082f190>

In [153]:
model_wide_mix = get_model_wide_mix()
model_wide_mix.fit([x_img_train, x_txt_train], y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

KeyboardInterrupt: 

In [156]:
def get_model_long_horns():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(512, activation='relu')(x_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(512)(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(512, activation='relu')(x_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(512, activation='relu')(x_txt)

    add = Add()([x_img, x_txt])
    add = Dense(512, activation='relu')(add)
    add = Dropout(0.25)(add)
    
    mult = Multiply()([x_img, x_txt])
    mult = Dense(512, activation='relu')(mult)
    mult = Dropout(0.25)(mult)
    
    mix = concatenate([add, mult])
    mix = Dense(512, activation='relu')(mix)
    mix = Dropout(0.25)(mix)

    out = Dense(50, activation='softmax')(mix)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [157]:
model_long_horns = get_model_long_horns()
model_long_horns.fit([x_img_train, x_txt_train], y_train, epochs=8, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x18aa5ed10>

In [158]:
def get_model_highway_horns():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Highway()(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(512)(x_img)

    x_txt = Highway()(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(512, activation='relu')(x_txt)

    add = Add()([x_img, x_txt])
    add = Dense(512, activation='relu')(add)
    add = Dropout(0.25)(add)
    
    mult = Multiply()([x_img, x_txt])
    mult = Dense(512, activation='relu')(mult)
    mult = Dropout(0.25)(mult)
    
    mix = concatenate([add, mult])
    mix = Dense(512, activation='relu')(mix)
    mix = Dropout(0.25)(mix)

    out = Dense(50, activation='softmax')(mix)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [160]:
model_highway_horns = get_model_highway_horns()
model_highway_horns.fit([x_img_train, x_txt_train], y_train, epochs=10, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19d166890>

In [141]:
def get_model_img_1():
    inp = Input(shape=(IMG_LEN,))
    
    x = Dense(1024, activation='relu')(inp)
    x = Dropout(0.25)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.25)(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [142]:
model_img_1 = get_model_img_1()
model_img_1.fit(x_img_train, y_train, epochs=10, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x16f9ea490>

In [139]:
def get_model_txt_1():
    inp = Input(shape=(TXT_LEN,))
        
        
    x1 = Dense(512, activation='relu')(inp)
    x1 = Dropout(0.25)(x1)
    
    x2 = Dense(512, activation='relu')(x1)
    x2 = Dropout(0.5)(x2)
    
    x = concatenate([x1, x2])
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.25)(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [140]:
model_txt_1 = get_model_txt_1()
model_txt_1.fit(x_txt_train, y_train, epochs=10, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x13b022dd0>

In [7]:
def get_model_2():
    x_img = Dense(128, activation='relu')(inp_img)
    x_img = Dropout(0.25)(x_img)
    x_img = Dense(128, activation='tanh')(x_img)

    x_txt = Dense(128, activation='relu')(inp_txt)
    x_txt = Dropout(0.25)(x_txt)
    x_txt = Dense(128, activation='tanh')(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.25)(x)
    x = Dense(256, activation='tanh')(x)
    x = Dropout(0.1)(x)
    x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [172]:
model = get_model_2()
model.fit([x_img, x_txt], y, epochs=10, validation_split=0.1)

Train on 9000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x14fe712d0>

In [115]:
model.evaluate([x_img_test, x_txt_test], y_test, verbose=0)

[0.04970714685320854, 0.98471624]

upgrades:  
1) bigger width of first layers  
2) add dropout before concatenation  
3) set dropout rate to 0.5  
4) only relu activations  
5) adadelt - decrease acc significantly  
6) like in resnet 2.0 use batch normalization (bet that will not help) . 


In [56]:
es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=0.001, patience=3)

def get_model_3():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = BatchNormalization()(x_img)
    x_img = Dense(512, activation='relu')(x_img)
    x_img = Dropout(0.5)(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = BatchNormalization()(x_txt)
    x_txt = Dense(256, activation='relu')(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    relu = Dense(256, activation='relu')(x)
    sigmoid = Dense(256, activation='sigmoid')(x)
    mult_1 = Multiply()([x, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x = Add()([mult_2, mult_1])

#     x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = concatenate([x_img, x_txt, x]) # smth residual
#     x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

upgrades:  
1) remove early stopping  
2) epochs = 40

In [58]:
model_3 = get_model_3()
model_3.fit([x_img_train, x_txt_train], y_train, epochs=40, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40

KeyboardInterrupt: 

In [None]:
model_3 = get_model_3()
model_3.fit([x_img_train, x_txt_train], y_train, epochs=40, validation_split=0.1)

In [62]:
def get_model_more_highways():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = BatchNormalization()(x_img)
    x_img = Dense(512, activation='relu')(x_img)
    x_img = Dropout(0.5)(x_img)

    relu = Dense(512, activation='relu')(x_img)
    sigmoid = Dense(512, activation='sigmoid')(x_img)
    mult_1 = Multiply()([x_img, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x_img = Add()([mult_2, mult_1])
    x_img = Dropout(0.5)(x_img)
                                         
    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = BatchNormalization()(x_txt)
    x_txt = Dense(256, activation='relu')(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    relu = Dense(256, activation='relu')(x_txt)
    sigmoid = Dense(256, activation='sigmoid')(x_txt)
    mult_1 = Multiply()([x_txt, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x_txt = Add()([mult_2, mult_1])
    x_txt = Dropout(0.5)(x_txt)
                                         
    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    relu = Dense(256, activation='relu')(x)
    sigmoid = Dense(256, activation='sigmoid')(x)
    mult_1 = Multiply()([x, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x = Add()([mult_2, mult_1])

#     x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = concatenate([x_img, x_txt, x]) # smth residual
#     x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [63]:
model_4 = get_model_more_highways()
model_4.fit([x_img_train, x_txt_train], y_train, epochs=40, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x14ea45a90>

In [64]:
def get_model_5():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = BatchNormalization()(x_img)
    x_img = Dense(512, activation='relu')(x_img)
    x_img = Dropout(0.5)(x_img)

    relu = Dense(512, activation='relu')(x_img)
    sigmoid = Dense(512, activation='sigmoid')(x_img)
    mult_1 = Multiply()([x_img, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x_img = Add()([mult_2, mult_1])
    x_img = Dropout(0.5)(x_img)
                                         
    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = BatchNormalization()(x_txt)
    x_txt = Dense(256, activation='relu')(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    relu = Dense(256, activation='relu')(x_txt)
    sigmoid = Dense(256, activation='sigmoid')(x_txt)
    mult_1 = Multiply()([x_txt, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x_txt = Add()([mult_2, mult_1])
    x_txt = Dropout(0.5)(x_txt)
                                         
    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    relu = Dense(256, activation='relu')(x)
    sigmoid = Dense(256, activation='sigmoid')(x)
    mult_1 = Multiply()([x, sigmoid])
    minus = Lambda(lambda x: 1.0 - x)(sigmoid)
    mult_2 = Multiply()([minus, relu])
    x = Add()([mult_2, mult_1])

#     x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
#     x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [65]:
model_5 = get_model_5()
model_5.fit([x_img_train, x_txt_train], y_train, epochs=30, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30

KeyboardInterrupt: 

In [82]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import warnings

from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras import backend as K
from tensorflow.python.keras.utils import conv_utils
from tensorflow.python.keras.utils.generic_utils import to_list
from  tensorflow.keras import regularizers 
from tensorflow.keras import constraints
from tensorflow.keras import activations
from tensorflow.keras import initializers

class Highway(Layer):
    """Densely connected highway network.
    Highway layers are a natural extension of LSTMs to feedforward networks.
    # Arguments
        init: name of initialization function for the weights of the layer
            (see [initializations](../initializations.md)),
            or alternatively, Theano function to use for weights
            initialization. This parameter is only relevant
            if you don't pass a `weights` argument.
        activation: name of activation function to use
            (see [activations](../activations.md)),
            or alternatively, elementwise Theano function.
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of Numpy arrays to set as initial weights.
            The list should have 2 elements, of shape `(input_dim, output_dim)`
            and (output_dim,) for weights and biases respectively.
        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
            (eg. L1 or L2 regularization), applied to the main weights matrix.
        b_regularizer: instance of [WeightRegularizer](../regularizers.md),
            applied to the bias.
        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
            applied to the network output.
        W_constraint: instance of the [constraints](../constraints.md) module
            (eg. maxnorm, nonneg), applied to the main weights matrix.
        b_constraint: instance of the [constraints](../constraints.md) module,
            applied to the bias.
        bias: whether to include a bias
            (i.e. make the layer affine rather than linear).
        input_dim: dimensionality of the input (integer). This argument
            (or alternatively, the keyword argument `input_shape`)
            is required when using this layer as the first layer in a model.
    # Input shape
        2D tensor with shape: `(nb_samples, input_dim)`.
    # Output shape
        2D tensor with shape: `(nb_samples, input_dim)`.
    # References
        - [Highway Networks](http://arxiv.org/abs/1505.00387v2)
    """

    def __init__(self,
                 init='glorot_uniform',
                 activation=None,
                 weights=None,
                 W_regularizer=None,
                 b_regularizer=None,
                 activity_regularizer=None,
                 W_constraint=None,
                 b_constraint=None,
                 bias=True,
                 input_dim=None,
                 **kwargs):
        
        if 'transform_bias' in kwargs:
            kwargs.pop('transform_bias')
            warnings.warn('`transform_bias` argument is deprecated and '
                          'has been removed.')
        self.init = initializers.get(init)
        self.activation = activations.get(activation)

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

        self.input_dim = input_dim
        if self.input_dim:
            kwargs['input_shape'] = (self.input_dim,)
        super(Highway, self).__init__(**kwargs)

    def build(self, input_shape):
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(),
                                    shape=(None, input_dim))

        self.W = self.add_weight(shape=(input_dim, input_dim),
                                 initializer=self.init,
                                 name='W',
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.W_carry = self.add_weight(shape=(input_dim, input_dim),
                                       initializer=self.init,
                                       name='W_carry')
        if self.bias:
            self.b = self.add_weight(shape=(input_dim,),
                                     initializer='zero',
                                     name='b',
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
            self.b_carry = self.add_weight(shape=(input_dim,),
                                           initializer='one',
                                           name='b_carry')
        else:
            self.b_carry = None

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, x):
        y = K.dot(x, self.W_carry)
        if self.bias:
            y += self.b_carry
        transform_weight = activations.sigmoid(y)
        y = K.dot(x, self.W)
        if self.bias:
            y += self.b
        act = self.activation(y)
        act *= transform_weight
        output = act + (1 - transform_weight) * x
        return output

    def get_config(self):
        config = {'init': initializers.serialize(self.init),
                  'activation': activations.serialize(self.activation),
                  'W_regularizer': regularizers.serialize(self.W_regularizer),
                  'b_regularizer': regularizers.serialize(self.b_regularizer),
                  'activity_regularizer':
                      regularizers.serialize(self.activity_regularizer),
                  'W_constraint': constraints.serialize(self.W_constraint),
                  'b_constraint': constraints.serialize(self.b_constraint),
                  'bias': self.bias,
                  'input_dim': self.input_dim}
        base_config = super(Highway, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [83]:
def get_model_6():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = BatchNormalization()(x_img)
    x_img = Dense(512, activation='relu')(x_img)
    x_img = Dropout(0.5)(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = BatchNormalization()(x_txt)
    x_txt = Dense(256, activation='relu')(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    x = Highway()(x)
    
#     x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
#     x = concatenate([x_img, x_txt, x]) # smth residual
#     x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [84]:
model_6 = get_model_6()
model_6.fit([x_img_train, x_txt_train], y_train, epochs=30, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x15c1e0450>

In [87]:
def get_model_7():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = Highway()(x_img)
    x_img = Dropout(0.5)(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = Highway()(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    x = Highway()(x)
    
    x = Dropout(0.5)(x)
#     x = concatenate([x_img, x_txt, x]) # smth residual
#     x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [88]:
model_7 = get_model_7()
model_7.fit([x_img_train, x_txt_train], y_train, epochs=30, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30

KeyboardInterrupt: 

In [89]:
def get_model_8():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = Highway()(x_img)
    x_img = Dropout(0.5)(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = Highway()(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(256, activation='relu')(x)
    x = Highway()(x)
    
    x = Dropout(0.5)(x)
    x = concatenate([x_img, x_txt, x]) # smth residual
    x = Dense(256, activation='relu')(x)
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [90]:
model_8 = get_model_8()
model_8.fit([x_img_train, x_txt_train], y_train, epochs=30, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30

KeyboardInterrupt: 

In [95]:
def get_model_9():
    inp_img = Input(shape=(1024,))
    inp_txt = Input(shape=(300,))
    
    x_img = Dense(1024, activation='relu')(inp_img)
    x_img = Dropout(0.5)(x_img)
    x_img = Highway()(x_img)
    x_img = Dropout(0.5)(x_img)

    x_txt = Dense(512, activation='relu')(inp_txt)
    x_txt = Dropout(0.5)(x_txt)
    x_txt = Highway()(x_txt)
    x_txt = Dropout(0.5)(x_txt)

    x = concatenate([x_img, x_txt])
    x = Dense(256)(x)
    x = Highway()(x)
    x1 = Dropout(0.5)(x)
    
    x = concatenate([x_img, x_txt, x1]) # smth residual
    x = Dense(256)(x)
    x = Highway()(x)
    x = Dropout(0.5)(x)
    
    out = Dense(50, activation='softmax')(x)

    model = Model(inputs=[inp_img, inp_txt], outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [96]:
model_9 = get_model_9()
model_9.fit([x_img_train, x_txt_train], y_train, epochs=30, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x18cc43150>

In [102]:
print(x_img_test[6])

[0.01431737 0.02877704 0.16622615 ... 0.92601949 0.55370623 1.17222333]


In [110]:
x_img_train, x_img_test, x_txt_train, x_txt_test, y_train, y_test = train_test_split(norm_x_img_q, norm_x_txt_q, y_q, test_size=0.2, random_state=42)

In [111]:
model_3 = get_model_3()
model_3.fit([x_img_train, x_txt_train], y_train, epochs=30, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x13f1e8490>