In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LayerNormalization, Bidirectional, Dense, Dropout
from tensorflow.keras.losses import BinaryFocalCrossentropy
from tensorflow.keras.optimizers import Adam

In [2]:
df = pd.read_csv("sentiment.csv")
df.head()

Unnamed: 0,Sentiment,Label
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [3]:
sentence = df.Sentiment.str.split()
lengthSentence = sentence.str.len()
maxLength = lengthSentence.max()

In [5]:
oov_tok = '<OOV>'
vocab = 3000

token = Tokenizer(num_words = vocab, oov_token=oov_tok,
                      filters='!"#$%&()*+,-./:;<=>?@[\\]^`{|}~\t\n')
token.fit_on_texts(df.Sentiment)

In [6]:
from sklearn.model_selection import train_test_split
train_val, test_df = train_test_split(df, test_size=0.2, stratify = df['Label'])
train_df, val_df = train_test_split(train_val, test_size = 1/8, stratify = train_val['Label'])

In [7]:
train_token = token.texts_to_sequences(train_df.Sentiment)
train_padded = pad_sequences(train_token, maxlen=maxLength, padding='post')

val_token = token.texts_to_sequences(val_df.Sentiment)
val_padded = pad_sequences(val_token, maxlen=maxLength, padding='post')

test_token = token.texts_to_sequences(test_df.Sentiment)
test_padded = pad_sequences(test_token, maxlen=maxLength, padding='post')

In [8]:
model = tf.keras.Sequential([
    Embedding(len(token.word_index)+1, 64, mask_zero=True),
    LayerNormalization(),
    Bidirectional(tf.keras.layers.LSTM(64, dropout=0.3)),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

In [9]:
lr = 0.001
optimizer=Adam(learning_rate=lr)
lossFunction = BinaryFocalCrossentropy(from_logits=True)

model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [10]:
model.fit(train_padded, train_df.Label, validation_data=(val_padded, val_df.Label), batch_size = 64, epochs=10, shuffle= True, verbose=2)

Epoch 1/10
11/11 - 13s - loss: 0.6904 - accuracy: 0.5257 - val_loss: 0.6812 - val_accuracy: 0.5600 - 13s/epoch - 1s/step
Epoch 2/10
11/11 - 1s - loss: 0.6038 - accuracy: 0.7729 - val_loss: 0.6436 - val_accuracy: 0.6300 - 887ms/epoch - 81ms/step
Epoch 3/10
11/11 - 1s - loss: 0.4712 - accuracy: 0.8629 - val_loss: 0.5770 - val_accuracy: 0.7300 - 883ms/epoch - 80ms/step
Epoch 4/10
11/11 - 1s - loss: 0.2603 - accuracy: 0.9271 - val_loss: 0.5601 - val_accuracy: 0.7000 - 884ms/epoch - 80ms/step
Epoch 5/10
11/11 - 1s - loss: 0.1060 - accuracy: 0.9771 - val_loss: 0.7213 - val_accuracy: 0.7500 - 899ms/epoch - 82ms/step
Epoch 6/10
11/11 - 1s - loss: 0.0466 - accuracy: 0.9900 - val_loss: 0.9308 - val_accuracy: 0.7500 - 877ms/epoch - 80ms/step
Epoch 7/10
11/11 - 1s - loss: 0.0347 - accuracy: 0.9929 - val_loss: 1.0014 - val_accuracy: 0.7500 - 851ms/epoch - 77ms/step
Epoch 8/10
11/11 - 1s - loss: 0.0255 - accuracy: 0.9943 - val_loss: 1.0830 - val_accuracy: 0.7300 - 866ms/epoch - 79ms/step
Epoch 9/10


<keras.callbacks.History at 0x7f5356705940>

In [11]:
model.evaluate(test_padded, test_df.Label)



[1.3835563659667969, 0.7049999833106995]

In [12]:
def deploy(text, token):
  token1 = token.texts_to_sequences([text])
  padToken = pad_sequences(token1, maxlen=maxLength, padding='post')
  predict = model.predict(padToken)
  label = np.where(predict > 0.5, 1, 0)
  if label[0][0] == 1:
    return "positive"
  else:
    return "nagative"


In [13]:
sentiment = "Welcome to Learn Share IT"
label = deploy(sentiment, token)
print("Your sentiment is", label)

Your sentiment is positive
