In [0]:
import os

import numpy as np

# For DataFrame object
import pandas as pd

# Neural Network
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

# Text Vectorizing
from keras.preprocessing.text import Tokenizer, text_to_word_sequence

# Train-test-split
from sklearn.model_selection import train_test_split

# History visualization
%matplotlib inline
import matplotlib.pyplot as plt

# Normalize
from sklearn.preprocessing import normalize

In [0]:
path = '/content/drive/My Drive/val.csv'
df = pd.read_csv(path)
df.head()

In [0]:
def delete_new_line_symbols(text):
    text = text.replace('\n', ' ')
    return text

In [0]:
columns = ['region', 'city', 'price', 'title', 'subcategory', 'category', 'datetime_submitted' ]
df.drop(columns, inplace=True, axis=1)

In [0]:
df.head()

In [0]:
df['description'] = df['description'].apply(delete_new_line_symbols)
df.head()

In [0]:
target = np.array(df['is_bad'].astype('uint8'))
target[:5]

In [0]:
tokenizer = Tokenizer(num_words=30000, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', 
                      lower=True, 
                      split=' ', 
                      char_level=False)

In [0]:
tokenizer.fit_on_texts(df['description'])
matrix = tokenizer.texts_to_matrix(df['description'], mode='count')
matrix.shape

In [0]:
def get_model():
    
    model = Sequential()
    
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=RMSprop(lr=0.0001), 
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

In [0]:
X = normalize(matrix)
y = target

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y,
                                                    test_size=0.2)

X_train.shape, y_train.shape

In [0]:
model = get_model()

history = model.fit(X_train, 
                    y_train, 
                    epochs=120, 
                    batch_size=500,
                    validation_data=(X_test, y_test))

history

In [0]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)

print('\nТочность на проверочных данных:', test_acc)

In [0]:
#print(type(predictions))
#predictions.astype(float)

In [0]:
# Results
prediction = np.round(model.predict(X_test), 2)
target_prediction = pd.DataFrame()
target_prediction['index'] = range(prediction.shape[0])
target_prediction['prediction'] = prediction
target_prediction.to_csv('target_prediction.csv',index=False)
pd.read_csv('target_prediction.csv')

In [0]:
y_test

In [0]:
history = history.history

fig = plt.figure(figsize=(20, 10))

ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(223)

x = range(120)

ax1.plot(x, history['acc'], 'b-', label='Accuracy')
ax1.plot(x, history['val_acc'], 'r-', label='Validation accuracy')
ax1.legend(loc='lower right')

ax2.plot(x, history['loss'], 'b-', label='Losses')
ax2.plot(x, history['val_loss'], 'r-', label='Validation losses')
ax2.legend(loc='upper right')