## LSTM Neural Nets and Flask/Streamlit Deploy

This small project deploy a LSTM/RNN Neural Net that can learn to read. Deployment occurs through Flask and Streamlit.

### Importing libraries

In [3]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Embedding
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re
import pickle

### Importing and working on dataset 

In [5]:
df = pd.read_csv('reviews_dataset.tsv.zip',header=0, delimiter="\t", quoting=3)
df = df[['review','sentiment']]

df.shape

(25000, 2)

In [6]:
df.sentiment.value_counts()

1    12500
0    12500
Name: sentiment, dtype: int64

Adding some cleaning methods for text so it make easier for the algorithm to comprehend it.

In [7]:
df['review'] = df['review'].apply(lambda x: x.lower())
df['review'] = df['review'].apply(lambda x: re.sub('[^a-zA-z0-9\s]','',x))

In [11]:
max_features = 1000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(df['review'].values)
X = tokenizer.texts_to_sequences(df['review'].values)
X = pad_sequences(X)
X.shape

(25000, 1473)

In [12]:
embed_dim = 50
model = Sequential()
model.add(Embedding(max_features, embed_dim,input_length = X.shape[1]))
model.add(LSTM(10))
model.add(Dense(2,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])

In [13]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1473, 50)          50000     
                                                                 
 lstm (LSTM)                 (None, 10)                2440      
                                                                 
 dense (Dense)               (None, 2)                 22        
                                                                 
Total params: 52,462
Trainable params: 52,462
Non-trainable params: 0
_________________________________________________________________
None


In [17]:
y = pd.get_dummies(df['sentiment']).values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25,
                                                    random_state = 99)
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

(18750, 1473) (18750, 2)
(6250, 1473) (6250, 2)


## Training the model

In [18]:
model.fit(X_train, y_train, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1b5bd7b2a90>

### Testing the model

Now we create a variable with a string of test to be availed by the model.

In [25]:
test = ['Movie is very good']
test = tokenizer.texts_to_sequences(test)
test = pad_sequences(test, maxlen=X.shape[1],dtype='int32', value=0)
print(test.shape)

sentiment = model.predict(test)[0]
if(np.argmax(sentiment) == 0):
    print("Negative")
elif (np.argmax(sentiment) == 1):
    print("Positive")

(1, 1473)
Positive


### Saving the model

In [26]:
with open('tokenizer.pickle', 'wb') as tk:
    pickle.dump(tokenizer, tk, protocol=pickle.HIGHEST_PROTOCOL)
    
model_json = model.to_json()
with open("model.json", "w") as js:
    js.write(model_json)

In [27]:
model.save_weights("model.h5")

# TUDO ACIMA FUNCIONOU PERFEITAMENTE BEM. POR FAVOR, NÃO ESTRAGUE TUDO!