<a href="https://colab.research.google.com/github/techfreakworm/MovieReview-SentimentAnalysis/blob/master/keras_team_imdb_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as  np

In [2]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.datasets import imdb


# set parameters:
max_features = 20000
maxlen = 100
batch_size = 32
embedding_dims = 128
filters = 250
kernel_size = 3
hidden_dims = 250
epochs = 5

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = Sequential()

# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))
model.add(Dropout(0.2))

# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())

# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))

# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test))

Using TensorFlow backend.


Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)
Build model...
Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4d79e5a198>

In [3]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 100, 128)          2560000   
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 128)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 98, 250)           96250     
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 250)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 250)               62750     
_________________________________________________________________
dropout_2 (Dropout)          (None, 250)               0         
_________________________________________________________________
activation_1 (Activation)    (None, 250)               0         
__________

In [9]:
zeros = np.zeros([1,maxlen], dtype='int32')
zeros

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [0]:
word_index[]=imdb.get_word_index()

sentence =  "I liked the movie very much. My friends said it was good, and they couldn't be any more correct. This is value for both time and money"
# sentence = "I hated the movie. My Friends said it was shit and they were right. Cast was pathetic as well"
# sentence = "good movie"
words = sentence.split()
words = [word.lower() for word in words]
import string
import numpy as np
table = str.maketrans('', '', string.punctuation)
words = [w.translate(table) for w in words]
input_array=np.array([[word_index[word]+3 if word in word_index else 0 for word in words]])

# print(input_array)
input_array = sequence.pad_sequences(input_array, maxlen)
# print(input_array)
# input_array.shape

In [16]:
prediction = model.predict_classes(input_array)
print(model.predict(input_array))
# print(model.predict(input_array))
# print(model.predict_classes(input_array))
if prediction[0][0] == 0:
  print('Bad')
if prediction[0][0] == 1:
  print('Good')

[[0.99954283]]
Good


In [0]:
# !mkdir model
model.save('./model/cnn_imdb.h5')

In [0]:
from google.colab import files
files.download('./model/cnn_imdb.h5')

In [34]:
!ls

cnn_imdb.h5  model  sample_data


In [41]:
cnnmodel = tf.keras.models.load_model(filepath='cnn_imdb.h5')

TypeError: ignored

In [43]:
!tensorflowjs_converter --input_format=keras cnn_imdb.h5 .

Using TensorFlow backend.


In [44]:
!ls

cnn_imdb.h5	  group1-shard2of3  model	sample_data
group1-shard1of3  group1-shard3of3  model.json


In [0]:
!mkdir tfjs

In [0]:
mv `ls | grep group` tfjs/

In [47]:
ls

cnn_imdb.h5  [0m[01;34mmodel[0m/  model.json  [01;34msample_data[0m/  [01;34mtfjs[0m/


In [0]:
mv model.json tfjs/

In [49]:
ls tfjs

group1-shard1of3  group1-shard2of3  group1-shard3of3  model.json


In [51]:
!apt-get install zip

Reading package lists... Done
Building dependency tree       
Reading state information... Done
zip is already the newest version (3.0-11build1).
0 upgraded, 0 newly installed, 0 to remove and 5 not upgraded.


In [52]:
!zip -r tfjs.zip tfjs

  adding: tfjs/ (stored 0%)
  adding: tfjs/group1-shard2of3 (deflated 8%)
  adding: tfjs/group1-shard3of3 (deflated 7%)
  adding: tfjs/group1-shard1of3 (deflated 7%)
  adding: tfjs/model.json (deflated 76%)


In [53]:
ls

cnn_imdb.h5  [0m[01;34mmodel[0m/  [01;34msample_data[0m/  [01;34mtfjs[0m/  tfjs.zip


In [0]:
from google.colab import files
files.download('tfjs.zip')