In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, MaxPooling1D, Conv1D, GlobalMaxPooling1D, Dropout
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import ModelCheckpoint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

In [3]:
train = pd.read_excel('razmetka_new.xlsx')

In [3]:
descriptions = train['text']
categories = train[u'cat']

max_words = 0
for desc in descriptions:
    words = len(desc.split())
    if words > max_words:
        max_words = words
print(max_words)

39


In [4]:
num_words = 275
max_review_len = 40

In [5]:
text = train['text']
y_train = train['cat']

In [6]:
tokenizer = Tokenizer(num_words=num_words)

In [7]:
tokenizer.fit_on_texts(text)

In [8]:
sequences = tokenizer.texts_to_sequences(text)

In [9]:
total_words = len(tokenizer.word_index)
print(total_words)

272


In [10]:
x_train = pad_sequences(sequences, maxlen=max_review_len)

In [11]:
model = Sequential()
model.add(Embedding(num_words, 128, input_length=max_review_len))
model.add(Conv1D(300, 8, padding='valid', activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [12]:
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [13]:
model_save_path = 'best_model.h5'
checkpoint_callback = ModelCheckpoint(model_save_path,
                                     monitor='val_accuracy',
                                     save_best_only=True,
                                     verbose=1)

In [14]:
history = model.fit(x_train, 
                    y_train, 
                    epochs=6,
                    batch_size=128,
                    validation_split=0.1,
                   callbacks=[checkpoint_callback])

Epoch 1/6
Epoch 1: val_accuracy improved from -inf to 0.40000, saving model to best_model.h5
Epoch 2/6
Epoch 2: val_accuracy improved from 0.40000 to 0.60000, saving model to best_model.h5
Epoch 3/6
Epoch 3: val_accuracy improved from 0.60000 to 0.80000, saving model to best_model.h5
Epoch 4/6
Epoch 4: val_accuracy improved from 0.80000 to 1.00000, saving model to best_model.h5
Epoch 5/6
Epoch 5: val_accuracy did not improve from 1.00000
Epoch 6/6
Epoch 6: val_accuracy did not improve from 1.00000


In [15]:
model.load_weights(model_save_path)