In [7]:
import pandas as pd
import numpy as np
from struct import unpack
from base64 import b64decode
import string

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, concatenate, BatchNormalization, Multiply, Add
from keras.callbacks import EarlyStopping
from keras.utils.np_utils import to_categorical

from functools import partial
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Activation, Multiply, Add, Lambda, Layer
from tensorflow.keras.initializers import Constant
from sklearn.model_selection import train_test_split

In [3]:
filename = "/Users/dmitry/Downloads/topics_dataset.json"
df = pd.read_json(filename, lines=True)

In [4]:
IMG_LEN = 1024
TXT_LEN = 300
N_CLASSES = 50

In [5]:
def unpck(l, x):
    return unpack('%df' % l, b64decode(x.encode('utf-8')))

unpck_img = partial(unpck, IMG_LEN)
unpck_txt = partial(unpck, TXT_LEN)

In [6]:
df_q = df.sample(frac=0.25)
x_img_q = np.stack(df_q['x1'].map(unpck_img), axis=0)
x_txt_q = np.stack(df_q['x2'].map(unpck_txt), axis=0)
y_q = to_categorical(np.array(df_q['y1']), N_CLASSES)

In [8]:
x_img_train, x_img_test, x_txt_train, x_txt_test, y_train, y_test = train_test_split(x_img_q, x_txt_q, y_q, test_size=0.2, random_state=42)

In [19]:
def get_simple_autoencoder():
    inp_img = Input(shape=(IMG_LEN,))
    inp_txt = Input(shape=(TXT_LEN,))

    concat = concatenate([inp_img, inp_txt])
    encoded = Dense(512, activation='relu')(concat)
    
    out_img = Dense(IMG_LEN)(encoded)
    out_txt = Dense(TXT_LEN)(encoded)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse')
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=30, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=encoded)

In [20]:
encoder = get_simple_autoencoder()

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [35]:
def get_autoencoder_1():
    inp_img = Input(shape=(IMG_LEN,))
    inp_txt = Input(shape=(TXT_LEN,))

    concat = concatenate([inp_img, inp_txt])
    encoded = Dense(512, activation='relu')(concat)
    
    out_img = Dense(IMG_LEN, activation='sigmoid', name='img')(encoded)
    out_txt = Dense(TXT_LEN, activation='sigmoid', name='txt')(encoded)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse', loss_weights={'img':1, 'txt':0.2})
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=30, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=encoded)

In [36]:
encoder_1 = get_autoencoder_1()

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [21]:
print(encoder.summary())

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           [(None, 1024)]       0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           [(None, 300)]        0                                            
__________________________________________________________________________________________________
concatenate_4 (Concatenate)     (None, 1324)         0           input_11[0][0]                   
                                                                 input_12[0][0]                   
__________________________________________________________________________________________________
dense_11 (Dense)                (None, 512)          678400      concatenate_4[0][0]        

In [24]:
train_e = encoder.predict([x_img_train, x_txt_train])
test_e = encoder.predict([x_img_test, x_txt_test])

In [28]:
np.shape(train_e)

(42468, 512)

In [31]:
def get_model():
    inp = Input(shape=(512,))
    x = Dense(512, activation='relu')(inp)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    out = Dense(50, activation='softmax')(x)
    
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [32]:
model = get_model()
model.fit(train_e, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x13a06b790>

In [38]:
train_e_1 = encoder_1.predict([x_img_train, x_txt_train])
model_1 = get_model()
model_1.fit(train_e_1, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x14ef5e0d0>

In [44]:
def get_autoencoder_2():
    inp_img = Input(shape=(IMG_LEN,))
    inp_txt = Input(shape=(TXT_LEN,))

    concat = concatenate([inp_img, inp_txt])
    hidden_1 = Dense(512, activation='relu')(concat)
    
    hidden_2 = Dropout(0.5)(hidden_1)
    hidden_2 = Dense(512, activation='relu')(hidden_2)
    
    hidden_3 = Dropout(0.5)(hidden_2)
    hidden_3 = Dense(512, activation='relu')(hidden_3)
    
    out_img = Dense(IMG_LEN, activation='sigmoid', name='img')(hidden_3)
    out_txt = Dense(TXT_LEN, activation='sigmoid', name='txt')(hidden_3)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse', loss_weights={'img':1, 'txt':0.2})
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=30, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=hidden_2)

In [45]:
encoder_2 = get_autoencoder_2()

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [46]:
train_e_2 = encoder_2.predict([x_img_train, x_txt_train])
model_2 = get_model()
model_2.fit(train_e_2, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x13de8d150>

In [43]:
print(encoder_2.summary())

Model: "model_13"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_23 (InputLayer)           [(None, 1024)]       0                                            
__________________________________________________________________________________________________
input_24 (InputLayer)           [(None, 300)]        0                                            
__________________________________________________________________________________________________
concatenate_8 (Concatenate)     (None, 1324)         0           input_23[0][0]                   
                                                                 input_24[0][0]                   
__________________________________________________________________________________________________
dense_31 (Dense)                (None, 512)          678400      concatenate_8[0][0]       

In [59]:
def get_autoencoder_2():
    inp_img = Input(shape=(IMG_LEN,))
    encoded_img = Dense(1024)(inp_img)
    encoded_img = Dropout(0.5)(encoded_img)
    
    inp_txt = Input(shape=(TXT_LEN,))
    encoded_txt = Dense(512)(inp_txt)
    encoded_txt = Dropout(0.5)(encoded_txt)
    
    concat = concatenate([inp_img, inp_txt])
    encoded = Dense(512, activation='relu')(concat)
    
    decoded_img = Dense(1024)(encoded)
    decoded_img = Dropout(0.5)(decoded_img)
    out_img = Dense(IMG_LEN, activation='sigmoid', name='img')(decoded_img)
    
    decoded_txt = Dense(512)(encoded)
    decoded_txt = Dropout(0.5)(decoded_txt)
    out_txt = Dense(TXT_LEN, activation='sigmoid', name='txt')(decoded_img)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse', loss_weights={'img':1, 'txt':0.2})
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=30, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=encoded)

In [60]:
encoder_2 = get_autoencoder_2()

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [61]:
train_e_2 = encoder_2.predict([x_img_train, x_txt_train])
model_2 = get_model()
model_2.fit(train_e_2, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1524ba650>

In [68]:
model_12 = get_model()
model_12.fit(train_e_2, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x157192f10>

In [82]:
max(x_txt_train[500])

0.1938919574022293

отскалить данные (текст меньше картинки в 10 раз)

на выход декодера relu/prelu

еще раз попробовать удлинить модель

Проверить данные по отдельности вместе с highway и residual

leaky relu

использовать tensorboard

In [84]:
def get_autoencoder_4():
    inp_img = Input(shape=(IMG_LEN,))
    inp_txt = Input(shape=(TXT_LEN,))

    concat = concatenate([inp_img, inp_txt])
    encoded = Dense(512, activation='relu')(concat)
    
    out_img = Dense(IMG_LEN, activation='sigmoid', name='img')(encoded)
    out_txt = Dense(TXT_LEN, activation='sigmoid', name='txt')(encoded)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse', loss_weights={'img':2, 'txt':0.2})
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=30, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=encoded)

In [85]:
encoder_4 = get_autoencoder_4()

Train on 38221 samples, validate on 4247 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [125]:
def get_relu_encoder():
    inp_img = Input(shape=(IMG_LEN,))
    inp_txt = Input(shape=(TXT_LEN,))
    
    encoded = concatenate([inp_img, inp_txt])
    encoded = Dense(512, activation='relu')(inp_img)
    
    out_img = Dense(IMG_LEN, activation='relu', name='img')(encoded)
    out_txt = Dense(TXT_LEN, activation='sigmoid', name='txt')(encoded)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse')
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=10, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=encoded)

In [135]:
def get_relu_encoder_1():
    inp_img = Input(shape=(IMG_LEN,))
    encoded_img = Dense(1024, activation='relu')(inp_img)
#     encoded_img = Dropout(0.5)(encoded_img)
    
    inp_txt = Input(shape=(TXT_LEN,))
    encoded_txt = Dense(512, activation='relu')(inp_txt)
#     encoded_txt = Dropout(0.5)(encoded_txt)
    
    encoded = concatenate([encoded_img, encoded_txt])
    encoded = Dense(512, activation='relu')(inp_img)
    
    decoded_img = Dense(1024, activation='relu')(encoded)
#     decoded_img = Dropout(0.5)(decoded_img)
    
    decoded_txt = Dense(512, activation='relu')(encoded)
#     decoded_txt = Dropout(0.5)(decoded_txt)
    
    out_img = Dense(IMG_LEN, activation='relu', name='img')(decoded_img)
    out_txt = Dense(TXT_LEN, activation='sigmoid', name='txt')(decoded_txt)
    
    autoencoder = Model(inputs=[inp_img, inp_txt], outputs=[out_img, out_txt])
    autoencoder.compile(optimizer='adadelta', loss='mse')
    
    autoencoder.fit([x_img_train, x_txt_train], [x_img_train, x_txt_train], epochs=3, validation_split=0.1)
    return Model(inputs=[inp_img, inp_txt], outputs=encoded)

In [126]:
img_encoder = get_relu_encoder()

Train on 38221 samples, validate on 4247 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [136]:
relu_encoder_1 = get_relu_encoder_1()

Train on 38221 samples, validate on 4247 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [113]:
from sklearn.preprocessing import MinMaxScaler, normalize

norm_x_img_q = normalize(x_img_q)
img_scaler = MinMaxScaler()
img_scaler.fit(norm_x_img_q)
scaled_x_img_q = img_scaler.transform(norm_x_img_q)

norm_x_txt_q = normalize(x_txt_q)
txt_scaler = MinMaxScaler()
txt_scaler.fit(norm_x_txt_q)
scaled_x_txt_q = txt_scaler.transform(norm_x_txt_q)


In [114]:
x_img_train, x_img_test, x_txt_train, x_txt_test, y_train, y_test = train_test_split(scaled_x_img_q, scaled_x_txt_q, y_q, test_size=0.2, random_state=42)

In [115]:
print(max([max(e) for e in x_img_train]))

1.0


In [137]:
train_e_relu = img_encoder.predict([x_img_train, x_txt_train])
model = get_model()
model.fit(train_e_relu, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x15cd82c50>

In [138]:
def get_model_13():
    inp = Input(shape=(512,))
    x = Dense(512, activation='relu')(inp)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    out = Dense(50, activation='softmax')(x)
    
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [139]:
model = get_model_13()
model.fit(train_e_relu, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1561db650>

In [142]:
def get_combined_model(encoder, model):
    inp_img = Input(shape=(IMG_LEN,))
    inp_txt = Input(shape=(TXT_LEN,))
    
    output_e = encoder([inp_img, inp_txt])
    output_m = model(output_e)
    
    return Model([inp_img, inp_txt], output_m)

In [147]:
combined_model = get_combined_model(img_encoder, model)
combined_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [148]:
combined_model.evaluate([x_img_test, x_txt_test], y_test, verbose=0)

[2.2489103235699446, 0.403598]

In [150]:
combined_model.fit([x_img_train, x_txt_train], y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x15b678ed0>

In [151]:
train_e_relu_1 = relu_encoder_1.predict([x_img_train, x_txt_train])
model = get_model()
model.fit(train_e_relu_1, y_train, epochs=20, validation_split=0.1)

Train on 38221 samples, validate on 4247 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x15a1dd6d0>