In [None]:
import pandas as pd
import numpy as np

In [None]:
restaurants_df = pd.read_csv('restaurants.csv')
restaurants_df.head()

In [None]:
restaurants_df.describe(include='all')

In [None]:
restaurants_df.shape

In [None]:
restaurants_df.nunique()

In [None]:
restaurants_df['text']
               

In [None]:
restaurants_df[restaurants_df['text'] == 'i love this place']

In [None]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(num_words=10000)

In [None]:
tokenizer.fit_on_texts(restaurants_df['text']) 
sequences = tokenizer.texts_to_sequences(restaurants_df['text'])

In [None]:
sequences

In [None]:
def vectorize(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results

In [None]:
vec_sequences = vectorize(sequences)

In [None]:
vec_sequences

In [None]:
labels = restaurants_df.values[:,1:6].astype('float32')

In [None]:
labels

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential()

model.add(Dense(128, activation='relu', input_shape=(10000,)))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(5, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
from sklearn.model_selection import train_test_split
X, X_test, y, y_test = train_test_split(vec_sequences, labels, test_size=800)
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=1700)

In [None]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', patience = 3, verbose=1)

In [None]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), callbacks = [es])

In [None]:
import matplotlib.pyplot as plt

plt.clf()
loss_values = history.history['loss']
val_loss_values = history.history['val_loss']
epochs = range(1, (len(history.history['loss']) + 1))
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
plt.clf()
acc_values = history.history['accuracy']
val_acc_values = history.history['val_accuracy']
epochs = range(1, (len(history.history['accuracy']) + 1))
plt.plot(epochs, acc_values, 'bo', label='Training acc')
plt.plot(epochs, val_acc_values, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
results = model.evaluate(X_test, y_test)
print("_"*100)
print("Test Loss and Accuracy")
print("results ", results)

In [None]:
predictions = model.predict(X_test)

In [None]:
opiniones = ['service', 'food', 'anecdotes', 'price', 'ambience']

In [None]:
for i in range(5):
    print(predictions[i])

In [None]:
sorted_categories = predictions[0].argsort()
sorted_categories

In [None]:
for op in range(12):
    sorted_categories = predictions[op].argsort()
    if sorted_categories[0] >= 0.1: # al menos cree en un 10% en la mejor predicción

        for i in range(4,-1,-1):
            print("{}".format(opiniones[sorted_categories[i]])+" ({:.3})".format(predictions[op][sorted_categories[i]]))
        
        [print("labels: {}".format(opiniones[i])) for i in range(5) if y_test[op][i]]
        print("______________________________________________________________________________________________________")
