# week-1 Tokenizer

In [22]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer

In [23]:
sentence = [
    'আমার একটি গরু আছে',
    'আমার একটি মহিষ আছে',
    'তোমার একটি গরু আছে!',
    'তুমি কি মনে কর আমার গরু তরতাজা'
]

In [24]:
tokenizer = Tokenizer(num_words=100)
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)
print(word_index)
print(sequences)

{'আমার': 1, 'একটি': 2, 'গরু': 3, 'আছে': 4, 'মহিষ': 5, 'তোমার': 6, 'তুমি': 7, 'কি': 8, 'মনে': 9, 'কর': 10, 'তরতাজা': 11}
[[1, 2, 3, 4], [1, 2, 5, 4], [6, 2, 3, 4], [7, 8, 9, 10, 1, 3, 11]]


In [25]:
test_data = [
    'আমার আসলেই একটু গরু আছে',
    'আমার গরু ঘাস খায়'
]
test_seq = tokenizer.texts_to_sequences(test_data)
print(test_seq)

[[1, 3, 4], [1, 3]]


In [26]:
tokenizer = Tokenizer(num_words=100,oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)
print(word_index)
print(sequences)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]


In [27]:
test_data = [
    'আমার আসলেই একটু গরু আছে',
    'আমার গরু ঘাস খায়'
]
test_seq = tokenizer.texts_to_sequences(test_data)
print(test_seq)

[[2, 1, 1, 4, 5], [2, 4, 1, 1]]


# Padding

In [28]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [29]:
sentence = [
    'আমার একটি গরু আছে',
    'আমার একটি মহিষ আছে',
    'তোমার একটি গরু আছে!',
    'তুমি কি মনে কর আমার গরু তরতাজা'
]

In [30]:
tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)

padded = pad_sequences(sequences)
print(word_index)
print(sequences)
print(padded)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]
[[ 0  0  0  2  3  4  5]
 [ 0  0  0  2  3  6  5]
 [ 0  0  0  7  3  4  5]
 [ 8  9 10 11  2  4 12]]


# padding post

In [31]:
tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)

padded = pad_sequences(sequences, padding='post')
print(word_index)
print(sequences)
print(padded)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]
[[ 2  3  4  5  0  0  0]
 [ 2  3  6  5  0  0  0]
 [ 7  3  4  5  0  0  0]
 [ 8  9 10 11  2  4 12]]


# Padding maxlen

In [32]:
tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)

padded = pad_sequences(sequences, padding='post',maxlen=5)
print(word_index)
print(sequences)
print(padded)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]
[[ 2  3  4  5  0]
 [ 2  3  6  5  0]
 [ 7  3  4  5  0]
 [10 11  2  4 12]]


In [33]:
tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)

padded = pad_sequences(sequences, maxlen=5)
print(word_index)
print(sequences)
print(padded)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]
[[ 0  2  3  4  5]
 [ 0  2  3  6  5]
 [ 0  7  3  4  5]
 [10 11  2  4 12]]


# Padding Truncating 

In [34]:
tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)

padded = pad_sequences(sequences, padding='post',maxlen=5,truncating='post')
print(word_index)
print(sequences)
print(padded)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]
[[ 2  3  4  5  0]
 [ 2  3  6  5  0]
 [ 7  3  4  5  0]
 [ 8  9 10 11  2]]


In [35]:
tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')
tokenizer.fit_on_texts(sentence)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentence)

padded = pad_sequences(sequences, maxlen=5,padding='pre', truncating='pre')
print(word_index)
print(sequences)
print(padded)

{'<OOV>': 1, 'আমার': 2, 'একটি': 3, 'গরু': 4, 'আছে': 5, 'মহিষ': 6, 'তোমার': 7, 'তুমি': 8, 'কি': 9, 'মনে': 10, 'কর': 11, 'তরতাজা': 12}
[[2, 3, 4, 5], [2, 3, 6, 5], [7, 3, 4, 5], [8, 9, 10, 11, 2, 4, 12]]
[[ 0  2  3  4  5]
 [ 0  2  3  6  5]
 [ 0  7  3  4  5]
 [10 11  2  4 12]]


# Pandas read_json file

In [36]:
# import pandas as pd 
# datastore = pd.read_json('/Users/msjahid/Desktop/NLP/sarcasm.json')
# datastore.head()

In [37]:
import json
with open('/Users/msjahid/Desktop/NLP/sarcasm.json', 'r') as f:
    datastore = json.load(f)

In [38]:
sentences = []
lable = []
urls = []
for item in datastore:
    sentences.append(item['headline'])
    lable.append(item['is_sarcastic'])
    urls.append(item['article_link'])

# Padding Shape

In [44]:
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
    -O /tmp/sarcasm.json

--2020-03-12 10:48:58--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.24.128
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.24.128|:443... connected.
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 200 OK
Length: 5643545 (5.4M) [application/json]
Saving to: ‘/tmp/sarcasm.json’


2020-03-12 10:49:18 (294 KB/s) - ‘/tmp/sarcasm.json’ saved [5643545/5643545]

[  308 15115   679  3337  2298    48   382  2576 15116     6  2577  8434
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0]
(26709, 40)


In [46]:
import json

with open("/tmp/sarcasm.json", 'r') as f:
    datastore = json.load(f)


sentences = [] 
labels = []
urls = []
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])
    urls.append(item['article_link'])



from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)

word_index = tokenizer.word_index
print(len(word_index))
print(word_index)
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences, padding='post')
print(padded[0])
print(padded.shape)

29657


[  308 15115   679  3337  2298    48   382  2576 15116     6  2577  8434
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0]
(26709, 40)


In [47]:
sentences[0]

"former versace store clerk sues over secret 'black code' for minority shoppers"

In [48]:
padded = pad_sequences(sequences)
print(padded[0])

[    0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0   308 15115   679  3337  2298    48   382  2576
 15116     6  2577  8434]


In [49]:
import pandas as pd 
import numpy as np 

In [50]:
bangla = pd.read_csv('/Users/msjahid/Downloads/content_2019 2.csv')

In [51]:
bangla.head()

Unnamed: 0,date,title,subtitle,author,comment,ref,section,media,page,tags,content,article_id
0,2019-01-01,মুক্তিযুদ্ধ-গবেষণার বর্ণনা তাজুল মোহাম্মদের,টরন্টোয় পাঠশালার বিজয় দিবসের আসর,উত্তর আমেরিকা অফিস,,https://www.prothomalo.com/northamerica/articl...,northamerica,print,1,,দর্শন-শিল্প-সাহিত্য-বিজ্ঞান চর্চার প্ল্যাটফর্ম...,1572837
1,2019-01-01,নতুন বছরে তারকাদের পরিকল্পনা কী?,,হাবিবুল্লাহ সিদ্দিক. ঢাকা,,https://www.prothomalo.com/entertainment/artic...,entertainment,print,1,টেলিভিশন,শুরু হলো নতুন বছর। সবার মতো তারকাদেরও আছে বছরজ...,1572804
2,2019-01-01,আফগানিস্তানে যুক্তরাষ্ট্রের পরবর্তী পদক্ষেপ কী?,ভূরাজনীতি,মাইকেল কুগেলম্যান,,https://www.prothomalo.com/opinion/article/157...,opinion,print,1,আন্তর্জাতিক,এই মুহূর্তে জালমে খলিলজাদ সম্ভবত সুখী মানুষ নন...,1572798
3,2019-01-01,সন্তানদের জন্য স্যান্ড্রার চিঠি,,বিনোদন ডেস্ক,,https://www.prothomalo.com/entertainment/artic...,entertainment,print,1,হলিউড,নেটফ্লিক্স অরিজিনালসের ‘বার্ড বক্স’ ছবিটি নিয়ে...,1572785
4,2019-01-01,সুনামগঞ্জে ২২. হবিগঞ্জে ১৬ প্রার্থীর জামানত বা...,,নিজস্ব প্রতিবেদক. সুনামগঞ্জ ও হবিগঞ্জ,২,https://www.prothomalo.com/bangladesh/article/...,bangladesh,print,1,সুনামগঞ্জ|একাদশ সংসদ নির্বাচন|বিএনপি|আওয়ামী লী...,নির্বাচনী আইন অনুযায়ী পর্যাপ্ত ভোট না পাওয়ায় স...,1572778


In [52]:
bangla.fillna('দেওয়া নাই', inplace=True)

In [53]:
bangla.head()

Unnamed: 0,date,title,subtitle,author,comment,ref,section,media,page,tags,content,article_id
0,2019-01-01,মুক্তিযুদ্ধ-গবেষণার বর্ণনা তাজুল মোহাম্মদের,টরন্টোয় পাঠশালার বিজয় দিবসের আসর,উত্তর আমেরিকা অফিস,দেওয়া নাই,https://www.prothomalo.com/northamerica/articl...,northamerica,print,1,দেওয়া নাই,দর্শন-শিল্প-সাহিত্য-বিজ্ঞান চর্চার প্ল্যাটফর্ম...,1572837
1,2019-01-01,নতুন বছরে তারকাদের পরিকল্পনা কী?,দেওয়া নাই,হাবিবুল্লাহ সিদ্দিক. ঢাকা,দেওয়া নাই,https://www.prothomalo.com/entertainment/artic...,entertainment,print,1,টেলিভিশন,শুরু হলো নতুন বছর। সবার মতো তারকাদেরও আছে বছরজ...,1572804
2,2019-01-01,আফগানিস্তানে যুক্তরাষ্ট্রের পরবর্তী পদক্ষেপ কী?,ভূরাজনীতি,মাইকেল কুগেলম্যান,দেওয়া নাই,https://www.prothomalo.com/opinion/article/157...,opinion,print,1,আন্তর্জাতিক,এই মুহূর্তে জালমে খলিলজাদ সম্ভবত সুখী মানুষ নন...,1572798
3,2019-01-01,সন্তানদের জন্য স্যান্ড্রার চিঠি,দেওয়া নাই,বিনোদন ডেস্ক,দেওয়া নাই,https://www.prothomalo.com/entertainment/artic...,entertainment,print,1,হলিউড,নেটফ্লিক্স অরিজিনালসের ‘বার্ড বক্স’ ছবিটি নিয়ে...,1572785
4,2019-01-01,সুনামগঞ্জে ২২. হবিগঞ্জে ১৬ প্রার্থীর জামানত বা...,দেওয়া নাই,নিজস্ব প্রতিবেদক. সুনামগঞ্জ ও হবিগঞ্জ,২,https://www.prothomalo.com/bangladesh/article/...,bangladesh,print,1,সুনামগঞ্জ|একাদশ সংসদ নির্বাচন|বিএনপি|আওয়ামী লী...,নির্বাচনী আইন অনুযায়ী পর্যাপ্ত ভোট না পাওয়ায় স...,1572778


In [54]:
tokenizer.fit_on_texts(bangla['title'])

word_index = tokenizer.word_index
print(len(word_index))
print(word_index)
sequences = tokenizer.texts_to_sequences(bangla['title'])
padded = pad_sequences(sequences, padding='post')
print(padded[0])
print(padded.shape)

60189


[ 5071 11859 11860 24360 24361     0     0     0     0     0     0     0
     0     0     0     0     0     0]
(31731, 18)


# Week-2 Word embedding

In [55]:
!pip install -q tensorflow-datasets 
# or "conda install -c anaconda tensorflow-datasets"

In [59]:
import tensorflow_datasets as tfds
import numpy as np

In [58]:
imdb, info = tfds.load("imdb_reviews",with_info=True, as_supervised=True)

[1mDownloading and preparing dataset imdb_reviews (80.23 MiB) to /Users/msjahid/tensorflow_datasets/imdb_reviews/plain_text/0.1.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, description='Shuffling...', max=10.0, style=ProgressStyle(description_…

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, description='Shuffling...', max=10.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, description='Shuffling...', max=20.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Reading...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=0.0, description='Writing...', max=2500.0, style=ProgressStyle(description_…

[1mDataset imdb_reviews downloaded and prepared to /Users/msjahid/tensorflow_datasets/imdb_reviews/plain_text/0.1.0. Subsequent calls will reuse this data.[0m




In [60]:
train_data, test_data = imdb['train'], imdb['test']

In [61]:
training_sentence =[]
training_labels = []

testing_sentence =[]
testing_labels = []

for s,l in train_data:
    training_sentence.append(str(s.numpy()))
    training_labels.append(l.numpy())
    
for s,l in test_data:
    testing_sentence.append(str(s.numpy()))
    testing_labels.append(l.numpy())
