In [1]:
import pandas as pd
import numpy as np

In [2]:
d1 = pd.read_csv("reviews_data1.csv")

In [3]:
reviews = d1['Reviews']
sentiment = d1['Recommended']

In [4]:
change_format_sentiment = []
for sent in sentiment:
    if sent == 'yes':
        change_format_sentiment.append("Pos")
    else:
        change_format_sentiment.append("Neg")

In [5]:
df_1 = pd.DataFrame(columns = ["Review", "Sentiment"])

for i in range(len(reviews)):
    row = pd.Series({'Review' : reviews[i], 'Sentiment' : change_format_sentiment[i]})
    df_1 = pd.concat([df_1, row.to_frame().T], ignore_index = True)

In [6]:
vocab = df_1['Review']

In [7]:
#import glove embeddings 
from tqdm import tqdm
embedding_vector = {}
f = open('glove.6B.200d.txt')
for line in tqdm(f):
    value = line.split(' ')
    word = value[0]
    coef = np.array(value[1:],dtype = 'float32')
    embedding_vector[word] = coef

400000it [00:07, 55702.02it/s]


In [8]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [9]:
token = Tokenizer()
token.fit_on_texts(vocab)

In [11]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_1, test_size=0.1, random_state=42)

In [12]:
x_train = list(train['Review'])
y_train = train['Sentiment']

In [13]:
vocab_size = len(token.word_index)+1
print(vocab_size)

14504


In [14]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

seq = token.texts_to_sequences(x_train)
pad_seq = pad_sequences(seq, maxlen=1100, padding = 'post')

In [15]:
from tqdm import tqdm
embedding_matrix = np.zeros((vocab_size,200))
for word,i in tqdm(token.word_index.items()):
    embedding_value = embedding_vector.get(word)
    if embedding_value is not None:
        embedding_matrix[i] = embedding_value

100%|████████████████████████████████████| 14503/14503 [00:00<00:00, 332715.22it/s]


In [16]:
embedding_matrix.shape

(14504, 200)

In [19]:
#convert positive-negative to 1-0
sentiment = {
    "Pos" : 0,
    "Neg" : 1
}

y_filtered_converted = []
for sent in y_train:
    y_filtered_converted.append(sentiment[sent])

In [20]:
y_filtered_converted = np.int64(y_filtered_converted)

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout,Embedding,Bidirectional, Conv1D, Flatten

In [26]:
from tensorflow import keras

In [28]:
model_4 = Sequential()
model_4.add(Embedding(vocab_size, 200, weights = [embedding_matrix], input_length = 1100, trainable = False))

model_4.add(Bidirectional(LSTM(256, return_sequences = True, kernel_regularizer = keras.regularizers.l1())))
model_4.add(Bidirectional(LSTM(128, return_sequences = True, kernel_regularizer = keras.regularizers.l1())))
model_4.add(Conv1D(256, 3, activation='tanh', padding = 'same'))
model_4.add(Flatten())


model_4.add(Dense(1,activation = 'sigmoid'))

model_4.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [29]:
history_4 = model_4.fit(pad_seq, 
                    y_filtered_converted,
                    batch_size=128, 
                    verbose=1, 
                    epochs=8,
                    validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [39]:
model_5 = Sequential()
model_5.add(Embedding(vocab_size, 200, weights = [embedding_matrix], input_length = 1100, trainable = False))

model_5.add(Conv1D(256, 3, activation='tanh', padding = 'same'))
model_5.add(Conv1D(256, 3, activation='tanh', padding = 'same'))
model_5.add(Conv1D(128, 3, activation='tanh', padding = 'same'))
model_5.add(Bidirectional(LSTM(256, return_sequences = True, kernel_regularizer = keras.regularizers.l1())))
model_5.add(Bidirectional(LSTM(128, return_sequences = True)))
model_5.add(Bidirectional(LSTM(128)))


model_5.add(Dense(1,activation = 'sigmoid'))

model_5.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [40]:
model_5.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 1100, 200)         2900800   
                                                                 
 conv1d_9 (Conv1D)           (None, 1100, 256)         153856    
                                                                 
 conv1d_10 (Conv1D)          (None, 1100, 256)         196864    
                                                                 
 conv1d_11 (Conv1D)          (None, 1100, 128)         98432     
                                                                 
 bidirectional_15 (Bidirecti  (None, 1100, 512)        788480    
 onal)                                                           
                                                                 
 bidirectional_16 (Bidirecti  (None, 1100, 256)        656384    
 onal)                                                

In [41]:
history_5 = model_5.fit(pad_seq, 
                    y_filtered_converted,
                    batch_size=128, 
                    verbose=1, 
                    epochs=8,
                    validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
 4/20 [=====>........................] - ETA: 4:14 - loss: 1.7783 - accuracy: 0.7051

KeyboardInterrupt: 

In [42]:
model_6 = Sequential()
model_6.add(Embedding(vocab_size, 200, weights = [embedding_matrix], input_length = 1100, trainable = False))

model_6.add(Conv1D(256, 3, activation='tanh', padding = 'same'))
model_6.add(Conv1D(256, 3, activation='tanh', padding = 'same'))
model_6.add(Conv1D(128, 3, activation='tanh', padding = 'same'))
model_6.add(Bidirectional(LSTM(256, return_sequences = True, kernel_regularizer = keras.regularizers.l1())))
model_6.add(Bidirectional(LSTM(256, return_sequences = True)))
model_6.add(Bidirectional(LSTM(128, return_sequences = True)))
model_6.add(Bidirectional(LSTM(128)))


model_6.add(Dense(1,activation = 'sigmoid'))

model_6.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [43]:
history_6 = model_6.fit(pad_seq, 
                    y_filtered_converted,
                    batch_size=128, 
                    verbose=1, 
                    epochs=15,
                    validation_split=0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [45]:
model_7 = Sequential()
model_7.add(Embedding(vocab_size, 200, weights = [embedding_matrix], input_length = 1100, trainable = False))

model_7.add(Bidirectional(LSTM(256, return_sequences = True, dropout = 0.2)))
model_7.add(Bidirectional(LSTM(256, return_sequences = True, kernel_regularizer = keras.regularizers.l1())))
model_7.add(Bidirectional(LSTM(128, return_sequences = True, dropout = 0.2)))
model_7.add(Conv1D(256, 3, activation='tanh', padding = 'same'))
model_7.add(Flatten())

model_7.add(Dense(10, activation = 'tanh'))

model_7.add(Dense(1,activation = 'sigmoid'))

model_7.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [46]:
model_7.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_11 (Embedding)    (None, 1100, 200)         2900800   
                                                                 
 bidirectional_25 (Bidirecti  (None, 1100, 512)        935936    
 onal)                                                           
                                                                 
 bidirectional_26 (Bidirecti  (None, 1100, 512)        1574912   
 onal)                                                           
                                                                 
 bidirectional_27 (Bidirecti  (None, 1100, 256)        656384    
 onal)                                                           
                                                                 
 conv1d_16 (Conv1D)          (None, 1100, 256)         196864    
                                                     

In [47]:
history_7 = model_7.fit(pad_seq, 
                    y_filtered_converted,
                    batch_size=64, 
                    verbose=1, 
                    epochs=15,
                    validation_split=0.2)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [48]:
model_8 = Sequential()
model_8.add(Embedding(vocab_size, 200, weights = [embedding_matrix], input_length = 1100, trainable = False))

model_8.add(Conv1D(256, 5, activation='tanh', padding = 'same'))
model_8.add(Conv1D(128, 3, activation='tanh', padding = 'same'))
model_8.add(Bidirectional(LSTM(256, return_sequences = True, dropout = 0.2)))
model_8.add(Bidirectional(LSTM(256, return_sequences = True, kernel_regularizer = keras.regularizers.L2(1e-4))))
model_8.add(Bidirectional(LSTM(128)))

model_8.add(Dense(1,activation = 'sigmoid'))

model_8.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [49]:
history_8 = model_8.fit(pad_seq, 
                    y_filtered_converted,
                    batch_size=64, 
                    verbose=1, 
                    epochs=8,
                    validation_split=0.2)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [51]:
model_9 = Sequential()
model_9.add(Embedding(vocab_size, 200, weights = [embedding_matrix], input_length = 1100, trainable = False))

model_9.add(Conv1D(256, 5, activation='tanh', padding = 'same'))
model_9.add(Conv1D(128, 3, activation='tanh', padding = 'same'))
model_9.add(Bidirectional(LSTM(256, return_sequences = True)))
model_9.add(Bidirectional(LSTM(256, return_sequences = True)))
model_9.add(Bidirectional(LSTM(256, return_sequences = True, dropout = 0.2)))
model_9.add(Bidirectional(LSTM(256, return_sequences = True, dropout = 0.2)))
model_9.add(Bidirectional(LSTM(128, kernel_regularizer = keras.regularizers.L2(1e-4))))

model_9.add(Dense(1,activation = 'sigmoid'))

model_9.compile(optimizer='adam',loss='binary_crossentropy',metrics = ['accuracy'])

In [52]:
model_9.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_13 (Embedding)    (None, 1100, 200)         2900800   
                                                                 
 conv1d_19 (Conv1D)          (None, 1100, 256)         256256    
                                                                 
 conv1d_20 (Conv1D)          (None, 1100, 128)         98432     
                                                                 
 bidirectional_31 (Bidirecti  (None, 1100, 512)        788480    
 onal)                                                           
                                                                 
 bidirectional_32 (Bidirecti  (None, 1100, 512)        1574912   
 onal)                                                           
                                                                 
 bidirectional_33 (Bidirecti  (None, 1100, 512)      

In [54]:
history_9 = model_9.fit(pad_seq, 
                    y_filtered_converted,
                    batch_size=128, 
                    verbose=1, 
                    epochs=8,
                    validation_split=0.2)

Epoch 1/8

KeyboardInterrupt: 