In [36]:
import keras
import tensorflow as tf
import pandas as pd
import numpy as np

In [37]:
from keras.datasets import imdb

In [38]:
vocabulary_size = 10000

In [39]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocabulary_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [40]:
from tensorflow import feature_column

In [41]:
imdb.load_data(num_words=10000)
data = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

In [42]:
print("Categories:", np.unique(targets))
print("Number of unique words:", len(np.unique(np.hstack(data))))

Categories: [0 1]
Number of unique words: 9998


In [43]:
length = [len(i) for i in data]
print("Average Review length:", np.mean(length))
print("Standard Deviation:", round(np.std(length)))

Average Review length: 234.75892
Standard Deviation: 173


In [44]:
index = imdb.get_word_index()
reverse_index = dict([(value, key) for (key, value) in index.items()]) 
decoded = " ".join( [reverse_index.get(i - 3, "#") for i in data[0]] )
print(decoded) 

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
# this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert # is an amazing actor and now the same being director # father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for # and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also # to the two little boy's that played the # of norman and paul they were just brilliant children are often left out of the # list i think because the stars that play them all grown up are such a big profile for th

In [45]:
def vectorize(sequences, dimension = 10000):
  results = np.zeros((len(sequences), dimension))
  for i, sequence in enumerate(sequences):
    results[i, sequence] = 1
  return results
data = vectorize(data)
targets = np.array(targets).astype("float32")

In [46]:
test_x = data[:10000]
test_y = targets[:10000]
train_x = data[10000:]
train_y = targets[10000:]

In [47]:
from keras import models
from keras import layers
from keras import Sequential

In [48]:
model=Sequential()
# Input - Layer
model.add(layers.Dense(50, activation = "relu", input_shape=(10000, )))
# Hidden - Layers
model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
model.add(layers.Dense(1024, activation = "relu"))
model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
model.add(layers.Dense(2, activation = "relu"))
# Output- Layer
model.add(layers.Dense(1, activation = "sigmoid"))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 50)                500050    
                                                                 
 dropout (Dropout)           (None, 50)                0         
                                                                 
 dense_3 (Dense)             (None, 1024)              52224     
                                                                 
 dropout_1 (Dropout)         (None, 1024)              0         
                                                                 
 dense_4 (Dense)             (None, 2)                 2050      
                                                                 
 dense_5 (Dense)             (None, 1)                 3         
                                                                 
Total params: 554,327
Trainable params: 554,327
Non-tr

In [49]:
model.compile(loss='binary_crossentropy', 
             optimizer='adam', 
             metrics=['accuracy'])

In [50]:
results = model.fit(
 train_x, train_y,
 epochs= 20,
 batch_size = 512,
 validation_data = (test_x, test_y)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [51]:
print(np.mean(results.history['accuracy']))

0.9761299967765809


In [52]:
print(np.mean(results.history['loss']))

0.06092624720185995


In [53]:
from keras.preprocessing.text import Tokenizer

In [54]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [55]:
label=[]
sentence=[]
with open('/content/drive/MyDrive/Datasets/yelp_labelled.txt') as f:
  lines = f.readlines()
for line in lines:
  label.append(int(line[-2]))
  sentence.append(line[:-3])

In [56]:
df = pd.DataFrame({'feature':sentence, 'target':label})

In [57]:
sentences_train=[]
sentences_test=[]
for i in range(700):
  sentences_train.append(df.feature[i])
for j in range(700,1000):
  sentences_test.append(df.feature[i])

In [58]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(sentences_train)

X_train = tokenizer.texts_to_sequences(sentences_train)
X_test = tokenizer.texts_to_sequences(sentences_test)

vocab_size = len(tokenizer.word_index) + 1

print(sentences_train[2])
print(X_train[2])

Not tasty and the texture was just nasty.
[14, 143, 2, 1, 432, 3, 45, 433]


In [59]:
y_train = df.target[:700]
y_test = df.target[700:]

In [60]:
for word in ['the', 'all', 'happy', 'sad']:
  print('{}: {}'.format(word, tokenizer.word_index[word]))

the: 1
all: 41
happy: 212
sad: 540


In [61]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

maxlen = 100

X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)

In [62]:
from keras.layers import Conv1D, GlobalMaxPooling1D, Embedding
from keras.layers import Activation, Dense

In [63]:
model_1 = Sequential()

embedding_dim = 50
maxlen= 100
num_filters= 64
kernel_size=5

model_1.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen))
model_1.add(Conv1D(64, 5, input_shape=(1,4), activation='relu'))
model_1.add(GlobalMaxPooling1D())
model_1.add(Dense(10,activation='relu'))
model_1.add(Dense(1, activation='sigmoid'))

model_1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model_1.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 100, 50)           83000     
                                                                 
 conv1d_2 (Conv1D)           (None, 96, 64)            16064     
                                                                 
 global_max_pooling1d_2 (Glo  (None, 64)               0         
 balMaxPooling1D)                                                
                                                                 
 dense_6 (Dense)             (None, 10)                650       
                                                                 
 dense_7 (Dense)             (None, 1)                 11        
                                                                 
Total params: 99,725
Trainable params: 99,725
Non-trainable params: 0
__________________________________________________

In [64]:
history_1 = model_1.fit(X_train, y_train,epochs=20,verbose=False,validation_data=(X_test, y_test),batch_size=32)

In [65]:
train_acc = model_1.evaluate(X_train, y_train, verbose=False)
print("Training: Loss: {:.4f}".format(train_acc[0]), "Accuracy: {:.4f}".format(train_acc[1]))
test_acc = model_1.evaluate(X_test, y_test, verbose=False)
print("Testing Loss: {:.4f}".format(test_acc[0]), "Accuracy:  {:.4f}".format(test_acc[1]))

Training: Loss: 0.0012 Accuracy: 1.0000
Testing Loss: 2.0353 Accuracy:  0.6400


In [66]:
#References : 
#1. https://builtin.com/data-science/how-build-neural-network-keras