In [1]:
import numpy as np
import pandas as pd
from gensim.models import word2vec
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding, Dropout, Flatten, GlobalAveragePooling1D, GlobalMaxPooling1D, MaxPool1D, Conv1D, MaxPooling1D, Conv2D, MaxPool2D
from keras.layers import LSTM, GRU
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
from keras.optimizers import RMSprop, Adagrad, Adam
from keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from keras import callbacks
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import TruncatedSVD

from collections import Counter
import tqdm
from tqdm import tqdm_notebook
from functools import reduce
import re

Using TensorFlow backend.


In [2]:
df = pd.concat([
    pd.read_csv('train.csv',index_col='id'),
    pd.read_csv('test.csv',index_col='id')
    ],axis=0)
df['part'] = df['author'].isnull().apply(lambda x: 'test' if x else 'train')
print('Количество примеров:',len(df),', доля обучающей выборки:',"{0:.2f}%".format(100*np.mean(df['part']=='train')))
df.sample(5)

Количество примеров: 27971 , доля обучающей выборки: 70.00%


Unnamed: 0_level_0,author,text,part
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
id08785,,Here and there were slimy objects of puzzling ...,test
id14131,HPL,I won't say that all this is wholly true in bo...,train
id00330,,"They did not know that beauty lies in harmony,...",test
id22659,,"I made up my mind, of course, that the box and...",test
id22757,MWS,"Fortunately, as I spoke my native language, Mr...",train


In [3]:
def CleanData(df):
    c = Counter()

    for ind, row in tqdm_notebook(df.iterrows(), total = df.shape[0], desc = 'Build punctuation dict'):
        c += Counter(re.sub('\w+','',row.text))

    dct = dict(c)
    dct.pop(' ')

    for i in dct.keys():
        dct[i]=''

    for index, row in tqdm_notebook(df.iterrows(), total = df.shape[0], desc = 'Clean texts'):
        df.loc[index,'clean_text'] = reduce(lambda x, y: x.replace(y, dct[y]), dct, row.text.lower())

    return df

In [4]:
data = CleanData(df)







In [5]:
train_df = data[data.part=='train']
test_df = data[data.part=='test']

In [6]:
train_df.head(2)

Unnamed: 0_level_0,author,text,part,clean_text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
id26305,EAP,"This process, however, afforded me no means of...",train,this process however afforded me no means of a...
id17569,HPL,It never once occurred to me that the fumbling...,train,it never once occurred to me that the fumbling...


## Preprocessing

In [7]:
data_eap = data[data.author=='EAP']
data_hpl = data[data.author=='HPL']
data_mws = data[data.author=='MWS']

for i_df, df in enumerate([data_eap, data_hpl, data_mws]):
    c = Counter()
    for ind, row in tqdm_notebook(df.iterrows(), total = df.shape[0], desc = 'Build word dicts'):
        c += Counter(row.clean_text.split(' '))
    
    if i_df == 0:
        dict_eap = dict(c)
    elif i_df == 1:
        dict_hpl = dict(c)
    else:
        dict_mws = dict(c)
        
#list(train_df[train_df.author=='EAP'].text.values)










In [8]:
list_eap_flt = list({key: dict_eap[key] for key in dict_eap.keys() if dict_eap[key]<3}.keys())
list_hpl_flt = list({key: dict_hpl[key] for key in dict_hpl.keys() if dict_hpl[key]<3}.keys())
list_mws_flt = list({key: dict_mws[key] for key in dict_mws.keys() if dict_mws[key]<3}.keys())

In [9]:
all_words_dict = dict(data['clean_text'].str.split(expand=True).unstack().value_counts())
delete_list = list({key: all_words_dict[key] for key in all_words_dict.keys() if all_words_dict[key]<3}.keys())

In [10]:
stop_words = stopwords.words('english')
for ind, row in tqdm_notebook(data.iterrows(), total = data.shape[0], desc = 'Clean_texts'):
    data.loc[ind,'clean_text_filtered'] = ' '.join([x for x in nltk.word_tokenize(row.clean_text) if (x not in stop_words and x not in delete_list)])




In [11]:
data.head(2)

Unnamed: 0_level_0,author,text,part,clean_text,clean_text_filtered
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
id26305,EAP,"This process, however, afforded me no means of...",train,this process however afforded me no means of a...,process however afforded means ascertaining di...
id17569,HPL,It never once occurred to me that the fumbling...,train,it never once occurred to me that the fumbling...,never occurred fumbling might mere mistake


In [12]:
def preprocess(text):
    text = text.replace("' ", " ' ")
    signs = set(',.:;"?!')
    prods = set(text) & signs
    if not prods:
        return text

    for sign in prods:
        text = text.replace(sign, ' {} '.format(sign) )
    return text

In [13]:
def create_docs(df, n_gram_max=2):
    def add_ngram(q, n_gram_max):
            ngrams = []
            for n in range(2, n_gram_max+1):
                for w_index in range(len(q)-n+1):
                    ngrams.append('--'.join(q[w_index:w_index+n]))
            return q + ngrams
        
    docs = []
    for doc in df.text:
        doc = preprocess(doc).split()
        docs.append(' '.join(add_ngram(doc, n_gram_max)))
    
    return docs

In [14]:
docs = create_docs(data)

## TF-IDF

In [6]:
tfidf_vec = TfidfVectorizer(stop_words='english', ngram_range=(1,3))
full_tfidf = tfidf_vec.fit_transform(train_df['text'].values.tolist() + test_df['text'].values.tolist())
train_tfidf = tfidf_vec.transform(train_df['text'].values.tolist())
test_tfidf = tfidf_vec.transform(test_df['text'].values.tolist())

In [7]:
n_comp = 3
svd_obj = TruncatedSVD(n_components=n_comp, algorithm='arpack')
svd_obj.fit(full_tfidf)
train_svd = pd.DataFrame(svd_obj.transform(train_tfidf))
test_svd = pd.DataFrame(svd_obj.transform(test_tfidf))

In [8]:
train_svd.head(2)

Unnamed: 0,0,1,2
0,0.025243,-0.01215,0.001335
1,0.009239,-0.003991,0.00101


## NN

In [15]:
df = data

In [16]:
tkn = Tokenizer(lower=True)
#tkn.fit_on_texts(df.clean_text_filtered)
tkn.fit_on_texts(docs)
df['tokens'] = tkn.texts_to_sequences(docs)
max_text_len = max(df.tokens.apply(len))
fix_text_len = 256
print('Максимальная длина текста:',max_text_len,' слов')
df['tokens'] = list(sequence.pad_sequences(df['tokens'].values, maxlen=fix_text_len))
df.sample(2)

Максимальная длина текста: 2582  слов


Unnamed: 0_level_0,author,text,part,clean_text,clean_text_filtered,tokens
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
id05403,EAP,I groaned in anguish at the pitiable spectacle.,train,i groaned in anguish at the pitiable spectacle,groaned anguish pitiable spectacle,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
id07154,EAP,"Another was a hickory, much larger than the el...",train,another was a hickory much larger than the elm...,another hickory much larger elm altogether muc...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [17]:
test_x = df[df['part']=='test']['tokens'].apply(list).tolist()
train = df[df['part']=='train']

In [18]:
train_x, valid_x, train_y, valid_y = train_test_split(train['tokens'].apply(list).tolist(),pd.get_dummies(train['author']), test_size=0.2)

In [19]:
num_features = len(data['clean_text_filtered'].str.split(expand=True).unstack().value_counts())
num_features

14435

In [47]:
input_dim = 29308+1 #np.max(len(docs))+1
print(input_dim)
embedding_dims = 32

29309


In [48]:
from keras_tqdm import TQDMNotebookCallback

batch_size = 16
epochs = 25
#num_features = 14435

print('Build model...')
model = Sequential()
model.add(Embedding(input_dim=input_dim, output_dim=embedding_dims))
model.add(GlobalAveragePooling1D())
model.add(Dense(3, activation='softmax'))
#model = Sequential()
#model.add(Embedding(num_features+1, 50))
#model.add(LSTM(60,return_sequences=True,kernel_initializer='he_normal'))
#model.add(LSTM(60))#, dropout=0.3, recurrent_dropout=0.1,kernel_initializer='he_normal'))
#model.add(Dropout(0.2))
#model.add(Dense(30, activation='relu'))
#model.add(Dense(3, activation='softmax'))

'''model = Sequential()
model.add(Embedding(num_features+1, 30))
#model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) # new
#model.add(MaxPooling1D(pool_size=2)) # new
model.add(LSTM(100, return_sequences=True)) # new
model.add(GlobalMaxPooling1D())
#model.add(LSTM(60,return_sequences=True,kernel_initializer='he_normal'))
#model.add(LSTM(60))#, dropout=0.3, recurrent_dropout=0.1,kernel_initializer='he_normal'))
##model.add(GlobalAveragePooling1D())
model.add(Dense(3, activation='softmax'))'''

'''
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Valid', activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (3,3), padding = 'Same',  activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-3), metrics=["accuracy"])
'''

'''
model = Sequential()
model.add(Conv1D(filters = 32, kernel_size = (1,5), padding = 'Valid', activation ='relu', input_shape = (28,28)))
model.add(Conv1D(filters = 32, kernel_size = 3, padding = 'Same',  activation ='relu'))
model.add(MaxPool1D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv1D(filters = 64, kernel_size = 3, padding = 'Same', activation ='relu'))
model.add(Conv1D(filters = 64, kernel_size = 3, padding = 'Same', activation ='relu'))
model.add(MaxPool1D(pool_size=2, strides=4))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))
'''

###model = Sequential()
###model.add(Embedding(max_features+1, 10 ,mask_zero=True))
#model.add(Bidirectional(LSTM(100, return_sequences=True),input_shape=(5, 10)))
#model.add(Dropout(0.8))
###model.add(GRU(30))
###model.add(Dense(20))
###model.add(Dropout(0.8))
###model.add(Dense(3, activation = 'softmax'))

# try using different optimizers and different optimizer configs

#lr = 0.01

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              #optimizer=Adagrad(lr=lr),
              #optimizer=RMSprop(),
              metrics=['categorical_accuracy'])

model.summary()

filepath="weights-improvement-{epoch:02d}-{val_loss:.4f}.hdf5"
checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

print('Train...')
early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=0.001, mode='auto')
model.fit(
    train_x,
    train_y.values,
    batch_size=batch_size,
    validation_data=(valid_x, valid_y.values),
    verbose=0,
    epochs=epochs,
    #callbacks=[TQDMNotebookCallback()]
    callbacks=[checkpoint, TQDMNotebookCallback(), reduce_lr]
)


Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, None, 32)          937888    
_________________________________________________________________
global_average_pooling1d_10  (None, 32)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 3)                 99        
Total params: 937,987.0
Trainable params: 937,987.0
Non-trainable params: 0.0
_________________________________________________________________
Train...


Epoch 00000: val_loss improved from inf to 0.91960, saving model to weights-improvement-00-0.9196.hdf5


Epoch 00001: val_loss improved from 0.91960 to 0.69488, saving model to weights-improvement-01-0.6949.hdf5


Epoch 00002: val_loss improved from 0.69488 to 0.57287, saving model to weights-improvement-02-0.5729.hdf5


Epoch 00003: val_loss improved from 0.57287 to 0.50461, saving model to weights-improvement-03-0.5046.hdf5


Epoch 00004: val_loss improved from 0.50461 to 0.45659, saving model to weights-improvement-04-0.4566.hdf5


Epoch 00005: val_loss improved from 0.45659 to 0.43113, saving model to weights-improvement-05-0.4311.hdf5


Epoch 00006: val_loss improved from 0.43113 to 0.41720, saving model to weights-improvement-06-0.4172.hdf5


Epoch 00007: val_loss improved from 0.41720 to 0.39800, saving model to weights-improvement-07-0.3980.hdf5


Epoch 00008: val_loss improved from 0.39800 to 0.39279, saving model to weights-improvement-08-0.3928.hdf5


Epoch 00009: val_loss improved from 0.39279 to 0.38858, saving model to weights-improvement-09-0.3886.hdf5


Epoch 00010: val_loss did not improve


Epoch 00011: val_loss did not improve


Epoch 00012: val_loss did not improve


Epoch 00013: val_loss did not improve


Epoch 00014: val_loss did not improve


Epoch 00015: val_loss did not improve


Epoch 00016: val_loss did not improve


Epoch 00017: val_loss did not improve


Epoch 00018: val_loss did not improve


Epoch 00019: val_loss did not improve


Epoch 00020: val_loss did not improve


Epoch 00021: val_loss did not improve


Epoch 00022: val_loss did not improve


Epoch 00023: val_loss did not improve


Epoch 00024: val_loss did not improve


<keras.callbacks.History at 0x1f55a34cbe0>

In [67]:
def LearnModel(epochs):
    model.fit(
        train_x,
        train_y.values,
        batch_size=batch_size,
        validation_data=(valid_x, valid_y.values),
        verbose=2,
        epochs=epochs,
        callbacks=[checkpoint, TQDMNotebookCallback(), reduce_lr]
    )

In [70]:
LearnModel(10)

Train on 15663 samples, validate on 3916 samples
Epoch 1/10
Epoch 00000: val_loss did not improve
124s - loss: 0.0672 - categorical_accuracy: 0.9824 - val_loss: 0.4518 - val_categorical_accuracy: 0.8437
Epoch 2/10


KeyboardInterrupt: 

In [71]:
model.load_weights("weights-improvement-20-0.3881.hdf5")
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              #optimizer=Adagrad(lr=lr),
              #optimizer=RMSprop(),
              metrics=['categorical_accuracy'])

In [72]:
score, acc = model.evaluate(valid_x, valid_y.values,batch_size=batch_size)



In [73]:
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 0.388136694118
Test accuracy: 0.844228804842


In [74]:
cols = list(train_y.columns)
print(cols)
test = df[df['part']=='test']#
test_x = test['tokens'].apply(list).tolist()
pred = model.predict_proba(test_x,verbose=False)
for i,e in enumerate(cols):
    test[e] = pred[:,i]
test.head(2)

['EAP', 'HPL', 'MWS']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0_level_0,author,text,part,clean_text,clean_text_filtered,tokens,EAP,HPL,MWS
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
id02310,,"Still, as I urged our leaving Ireland with suc...",test,still as i urged our leaving ireland with such...,still urged leaving ireland inquietude impatie...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.113054,0.026434,0.860512
id24541,,"If a fire wanted fanning, it could readily be ...",test,if a fire wanted fanning it could readily be f...,fire wanted could readily fanned newspaper gov...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.993533,0.006456,1.1e-05


In [75]:
test[train_y.columns].to_csv('rnn_adam_161117_01.csv')

## XGBoost

In [9]:
author_mapping_dict = {'EAP':0, 'HPL':1, 'MWS':2}
train_y = train_df['author'].map(author_mapping_dict)

In [10]:
train_y.values.shape

(19579,)

In [11]:
train_df.head(3)

Unnamed: 0_level_0,author,text,part
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
id26305,EAP,This process however afforded me no means of a...,train
id17569,HPL,It never once occurred to me that the fumbling...,train
id11008,EAP,In his left hand was a gold snuff box from whi...,train


In [12]:
train_svd.values.shape

(19579, 3)

In [13]:
X_train, X_valid, y_train, y_valid = train_test_split(train_svd.values,
                                                     train_y,
                                                     test_size = 0.2)

In [14]:
set(y_valid)

{0, 1, 2}

In [None]:
import xgboost as xgb

params ={
        'eta':0.01,
        'n_estimators': 1612,
        'max_depth': 10,
        'min_child_weight': 1,
        'colsample_bytree': 0.3,
        'num_class': 3,
        #'ntree_limit': 6,
        'num_parallel_tree': 3,
        'objective': 'multi:softprob',
        'eval_metric': 'mlogloss'
        }

do_validation = True

if do_validation:
    print('1')
    dtrain = xgb.DMatrix(X_train, label = y_train)
    print('2')
    dvalid = xgb.DMatrix(X_valid, label = y_valid)
    print('3')
    watchlist = [(dtrain, 'train'), (dvalid, 'train(test)')]
    print('4')
else:
    print('5')
    dtrain = xgb.DMatrix(train_svd.values, label = train_y.values)
    print('6')
    watchlist = [(dtrain, 'train')]
    print('7')

%time xgb_model = xgb.train(params = params, dtrain = dtrain, evals=watchlist, num_boost_round = params['n_estimators'], verbose_eval=100, early_stopping_rounds=100)

dtest1 = xgb.DMatrix(test_svd)
#preds = xgb_model.predict(dtest)

test_df['prediction'] = xgb_model.predict(dtest1,ntree_limit=xgb_model.best_iteration)



1
2
3
4


## Capsule NN

In [118]:
import numpy as np
np.random.seed(666)
import pandas as pd
from sklearn.cross_validation import train_test_split

In [119]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sample = pd.read_csv("sample_submission.csv")
sample.head(2)

Unnamed: 0,id,EAP,HPL,MWS
0,id02310,0.403494,0.287808,0.308698
1,id24541,0.403494,0.287808,0.308698


In [120]:
train["EAP"] = (train.author=="EAP")*1
train["HPL"] = (train.author=="HPL")*1
train["MWS"] = (train.author=="MWS")*1
train.drop("author", 1, inplace=True)
target_vars = ["EAP", "HPL", "MWS"]
train.head(2)

Unnamed: 0,id,text,EAP,HPL,MWS
0,id26305,"This process, however, afforded me no means of...",1,0,0
1,id17569,It never once occurred to me that the fumbling...,0,1,0


In [121]:
import nltk.stem as stm
import re
stemmer = stm.SnowballStemmer("english")
train["stem_text"] = train.text.apply(lambda x: (" ").join([stemmer.stem(z) for z in re.sub("[^a-zA-Z0-9]"," ", x).split(" ")]))
test["stem_text"] = test.text.apply(lambda x: (" ").join([stemmer.stem(z) for z in re.sub("[^a-zA-Z0-9]"," ", x).split(" ")]))
train.head(3)

Unnamed: 0,id,text,EAP,HPL,MWS,stem_text
0,id26305,"This process, however, afforded me no means of...",1,0,0,this process howev afford me no mean of asce...
1,id17569,It never once occurred to me that the fumbling...,0,1,0,it never onc occur to me that the fumbl might ...
2,id11008,"In his left hand was a gold snuff box, from wh...",1,0,0,in his left hand was a gold snuff box from wh...


In [122]:
from keras.preprocessing.text import Tokenizer
tok_raw = Tokenizer()
tok_raw.fit_on_texts(train.text.str.lower())
tok_stem = Tokenizer()
tok_stem.fit_on_texts(train.stem_text)
train["seq_text_stem"] = tok_stem.texts_to_sequences(train.stem_text)
test["seq_text_stem"] = tok_stem.texts_to_sequences(test.stem_text)
train.head(3)

Unnamed: 0,id,text,EAP,HPL,MWS,stem_text,seq_text_stem
0,id26305,"This process, however, afforded me no means of...",1,0,0,this process howev afford me no mean of asce...,"[27, 1895, 162, 743, 22, 37, 201, 2, 1687, 1, ..."
1,id17569,It never once occurred to me that the fumbling...,0,1,0,it never onc occur to me that the fumbl might ...,"[10, 99, 138, 681, 4, 22, 9, 1, 3806, 85, 23, ..."
2,id11008,"In his left hand was a gold snuff box, from wh...",1,0,0,in his left hand was a gold snuff box from wh...,"[7, 15, 164, 122, 8, 6, 943, 4296, 642, 24, 18..."


In [123]:
from keras.preprocessing.sequence import pad_sequences
def get_keras_data(dataset, maxlen=20):
    X = {
        "stem_input": pad_sequences(dataset.seq_text_stem, maxlen=maxlen)
    }
    return X


maxlen = 60
dtrain, dvalid = train_test_split(train, random_state=123, train_size=0.85)
X_train = get_keras_data(dtrain, maxlen)
y_train = np.array(dtrain[target_vars])
X_valid = get_keras_data(dvalid, maxlen)
y_valid = np.array(dvalid[target_vars])
X_test = get_keras_data(test, maxlen)

n_stem_seq = np.max( [np.max(X_valid["stem_input"]), np.max(X_train["stem_input"])])+1

In [128]:
import keras.backend as K
import tensorflow as tf
from keras import initializers, layers

class CapsuleLayer(layers.Layer):
    """
    The capsule layer. It is similar to Dense layer. Dense layer has `in_num` inputs, each is a scalar, the output of the 
    neuron from the former layer, and it has `out_num` output neurons. CapsuleLayer just expand the output of the neuron
    from scalar to vector. So its input shape = [None, input_num_capsule, input_dim_vector] and output shape = \
    [None, num_capsule, dim_vector]. For Dense Layer, input_dim_vector = dim_vector = 1.
    
    :param num_capsule: number of capsules in this layer
    :param dim_vector: dimension of the output vectors of the capsules in this layer
    :param num_routings: number of iterations for the routing algorithm
    """
    def __init__(self, num_capsule, dim_vector, num_routing=3,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_vector = dim_vector
        self.num_routing = num_routing
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

    def build(self, input_shape):
        assert len(input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_vector]"
        self.input_num_capsule = input_shape[1]
        self.input_dim_vector = input_shape[2]

        # Transform matrix
        self.W = self.add_weight(shape=[self.input_num_capsule, self.num_capsule, self.input_dim_vector, self.dim_vector],
                                 initializer=self.kernel_initializer,
                                 name='W')

        # Coupling coefficient. The redundant dimensions are just to facilitate subsequent matrix calculation.
        self.bias = self.add_weight(shape=[1, self.input_num_capsule, self.num_capsule, 1, 1],
                                    initializer=self.bias_initializer,
                                    name='bias',
                                    trainable=False)
        self.built = True

    def call(self, inputs, training=None):
        # inputs.shape=[None, input_num_capsule, input_dim_vector]
        # Expand dims to [None, input_num_capsule, 1, 1, input_dim_vector]
        inputs_expand = K.expand_dims(K.expand_dims(inputs, 2), 2)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # Now it has shape = [None, input_num_capsule, num_capsule, 1, input_dim_vector]
        inputs_tiled = K.tile(inputs_expand, [1, 1, self.num_capsule, 1, 1])

        """  
        # Compute `inputs * W` by expanding the first dim of W. More time-consuming and need batch_size.
        # Now W has shape  = [batch_size, input_num_capsule, num_capsule, input_dim_vector, dim_vector]
        w_tiled = K.tile(K.expand_dims(self.W, 0), [self.batch_size, 1, 1, 1, 1])
        
        # Transformed vectors, inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = K.batch_dot(inputs_tiled, w_tiled, [4, 3])
        """
        # Compute `inputs * W` by scanning inputs_tiled on dimension 0. This is faster but requires Tensorflow.
        # inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = tf.scan(lambda ac, x: K.batch_dot(x, self.W, [3, 2]),
                             elems=inputs_tiled,
                             initializer=K.zeros([self.input_num_capsule, self.num_capsule, 1, self.dim_vector]))
        """
        # Routing algorithm V1. Use tf.while_loop in a dynamic way.
        def body(i, b, outputs):
            c = tf.nn.softmax(self.bias, dim=2)  # dim=2 is the num_capsule dimension
            outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True))
            b = b + K.sum(inputs_hat * outputs, -1, keepdims=True)
            return [i-1, b, outputs]
        cond = lambda i, b, inputs_hat: i > 0
        loop_vars = [K.constant(self.num_routing), self.bias, K.sum(inputs_hat, 1, keepdims=True)]
        _, _, outputs = tf.while_loop(cond, body, loop_vars)
        """
        # Routing algorithm V2. Use iteration. V2 and V1 both work without much difference on performance
        assert self.num_routing > 0, 'The num_routing should be > 0.'
        for i in range(self.num_routing):
            c = tf.nn.softmax(self.bias, dim=2)  # dim=2 is the num_capsule dimension
            # outputs.shape=[None, 1, num_capsule, 1, dim_vector]
            outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True))

            # last iteration needs not compute bias which will not be passed to the graph any more anyway.
            if i != self.num_routing - 1:
                # self.bias = K.update_add(self.bias, K.sum(inputs_hat * outputs, [0, -1], keepdims=True))
                self.bias += K.sum(inputs_hat * outputs, -1, keepdims=True)
            # tf.summary.histogram('BigBee', self.bias)  # for debugging
        return K.reshape(outputs, [-1, self.num_capsule, self.dim_vector])
    
class Mask(layers.Layer):
    """
    Mask a Tensor with shape=[None, d1, d2] by the max value in axis=1.
    Output shape: [None, d2]
    """
    def call(self, inputs, **kwargs):
        # use true label to select target capsule, shape=[batch_size, num_capsule]
        if type(inputs) is list:  # true label is provided with shape = [batch_size, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of vectors of capsules
            x = inputs
            # Enlarge the range of values in x to make max(new_x)=1 and others < 0
            x = (x - K.max(x, 1, True)) / K.epsilon() + 1
            mask = K.clip(x, 0, 1)  # the max value in x clipped to 1 and other to 0

        # masked inputs, shape = [batch_size, dim_vector]
        inputs_masked = K.batch_dot(inputs, mask, [1, 1])
        return inputs_masked

    def compute_output_shape(self, input_shape):
        if type(input_shape[0]) is tuple:  # true label provided
            return tuple([None, input_shape[0][-1]])
        else:
            return tuple([None, input_shape[-1]])

In [136]:
from keras.layers import Dense, Dropout, Embedding
from keras.layers import Flatten, Input, SpatialDropout1D, Reshape
from keras.models import Model
from keras.optimizers import Adam 

def get_model():
    embed_dim = 50
    dropout_rate = 0.9
    emb_dropout_rate = 0.9
   
    input_text = Input(shape=[maxlen], name="stem_input")
    
    emb_lstm = SpatialDropout1D(emb_dropout_rate) (Embedding(n_stem_seq, embed_dim
                                                ,input_length = maxlen
                                                               ) (input_text))
    dense = Dropout(dropout_rate) (Dense(1024) (Flatten() (emb_lstm)))
    dense = Reshape((128, 8)) (dense)
    dense = Flatten() (Mask()(CapsuleLayer(128, 8))(dense))
    
    output = Dense(3, activation="softmax")(dense)

    model = Model([input_text], output)

    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

model = get_model()
model.summary()

AttributeError: 'CapsuleLayer' object has no attribute 'get_shape'

In [132]:
#model = get_model()
model.fit(X_train, y_train, epochs=27
          , validation_data=[X_valid, y_valid]
         , batch_size=1024)

ValueError: No data provided for "conv1d_12_input". Need data for each key in: ['conv1d_12_input']

In [127]:
from sklearn.metrics import log_loss

preds_train = model.predict(X_train)
preds_valid = model.predict(X_valid)

print(log_loss(y_train, preds_train))
print(log_loss(y_valid, preds_valid))

ValueError: No data provided for "conv1d_12_input". Need data for each key in: ['conv1d_12_input']

In [None]:
preds = pd.DataFrame(model.predict(X_test), columns=target_vars)
submission = pd.concat([test["id"],preds], 1)
submission.to_csv("CapsuleNN_submission_181117.csv", index=False)
submission.head()