In [None]:
%tensorflow_version 1.15
%matplotlib inline
!pip install memory-profiler
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords

from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import confusion_matrix

print(tf.__version__)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    min_delta=0, 
    patience=3, 
    verbose=1, 
    mode='min',
)

class MyCallback(tf.keras.callbacks.Callback):
  def on_train_end(self, logs={}):
    self.send_email()

  def send_email(self):
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    mail_content = "Training finished, check me after you finished your cup of coffee"
    #The mail addresses and password
    sender_address = 'REDACTED : INPUT YOUR EMAIL HERE'
    sender_pass = 'REDACTED : INPUT YOUR EMAIL PASSWORD HERE'
    receiver_address = 'REDACTED : INPUT RECEIVER EMAIL ADDRESS'
    #Setup the MIME
    message = MIMEMultipart()
    message['From'] = sender_address
    message['To'] = receiver_address
    message['Subject'] = 'Model Training is finished'   #The subject line
    #The body and the attachments for the mail
    message.attach(MIMEText(mail_content, 'plain'))
    #Create SMTP session for sending the mail
    session = smtplib.SMTP('smtp.gmail.com', 587) #use gmail with port
    session.starttls() #enable security
    session.login(sender_address, sender_pass) #login with mail_id and password
    text = message.as_string()
    session.sendmail(sender_address, receiver_address, text)
    session.quit()
    print('Mail Sent')


callbacks = [early_stop, MyCallback()]



In [None]:
# Data Preprocessing

vocab_size = 10000
embedding_dim = 64
max_length = 250
trunc_type='post'
padding_type='post'
oov_token = "<OOV>"
training_portion = .8

def train_test_split(data, train_size):
    train = data[:train_size]
    test = data[train_size:]
    return train, test

df = pd.read_csv('/content/drive/My Drive/final.csv')
df = df.sample(frac=1).reset_index(drop=True)
df.head()

train_size = int(len(df.product_name) * training_portion)

train_text, test_text = train_test_split(df['product_name'], train_size)
train_cat, test_cat = train_test_split(df['category'], train_size)

tokenize = Tokenizer(num_words=vocab_size, char_level=False)
tokenize.fit_on_texts(train_text) # fit tokenizer to our training text data

x_train_sequences = tokenize.texts_to_sequences(train_text)
x_test_sequences = tokenize.texts_to_sequences(test_text)

x_train = pad_sequences(x_train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
x_test = pad_sequences(x_test_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

encoder = LabelEncoder()
encoder.fit(train_cat)
y_train = encoder.transform(train_cat)
y_test = encoder.transform(test_cat)

# Converts the labels to a one-hot representation
num_classes = np.max(y_train) + 1
print(num_classes)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)


In [None]:
# Build and Train Model

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=max_length),
    #tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128)),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(num_classes, activation='softmax'),
    ])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

history = model.fit(x_train, y_train,
                    batch_size=32,
                    epochs=20,
                    verbose=1,
                    validation_split=0.1,
                    callbacks=callbacks
                    )

score = model.evaluate(x_test, y_test,
                       batch_size=32, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

text_labels = encoder.classes_ 

for i in range(10):
    prediction = model.predict(np.array([x_test[i]]))
    predicted_label = text_labels[np.argmax(prediction)]
    print(test_text.iloc[i][:50], "...")
    print('Actual label:' + test_cat.iloc[i])
    print("Predicted label: " + predicted_label + "\n")  
    

import pickle
model.save('/content/drive/My Drive/text_classification.h5', include_optimizer=True)

encoder_filename = '/content/drive/My Drive/encoder.pickle'
pickle.dump(encoder, open(encoder_filename, 'wb'))

tokenizer_filename = '/content/drive/My Drive/tokenizer.pickle'
pickle.dump(tokenize, open(tokenizer_filename, 'wb'))

In [None]:
####
# Load Saved Model

from tensorflow.keras.models import load_model

m = load_model('/content/drive/My Drive/text_classification.h5')
encoder_filename = '/content/drive/My Drive/encoder.pickle'
tokenizer_filename = '/content/drive/My Drive/tokenizer.pickle'

loaded_encoder = pickle.load(open(encoder_filename, 'rb'))
labels = loaded_encoder.classes_

tokenizer = pickle.load(open(tokenizer_filename, 'rb'))

txt_input = ['tea tree oil acne cream']
txt_sentences = tokenizer.texts_to_sequences(txt_input)
padded_input = pad_sequences(txt_sentences, maxlen=max_length, padding='post', truncating='post')
#matrix_input = tokenize.texts_to_matrix(txt_input, mode='binary')

prediction = m.predict(padded_input)
predicted_label = labels[np.argmax(prediction)]

print(predicted_label)

In [None]:
# Plot Graph

import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()