<a href="https://colab.research.google.com/github/raqueeb/TensorFlow2/blob/master/embedding_bangla_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### একটা এমবেডিং ট্রেনিং করতে চাচ্ছি

জেফ হিটনের নোটবুক থেকে।


In [1]:
from numpy import array
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Embedding, Dense

In [0]:
# Define 10 resturant reviews.
reviews = [
    'আমি আর আসছি না এখানে!',
    'একদম বাজে সার্ভিস',
    'কথা শোনে না ওয়েটার',
    'একদম ঠান্ডা খাবার',
    'বাজে খাবার!',
    'অসাধারণ',
    'অসাধারণ সার্ভিস!',
    'খুব ভালো!',
    'মোটামুটি',
    'এর থেকে ভালো হয়না']

# Define labels (1=negative, 0=positive)
labels = array([1,1,1,1,1,0,0,0,0,0])

In [0]:
from sklearn.feature_extraction.text import TfidfVectorizer

vect = TfidfVectorizer(decode_error='ignore',
                       max_features=4)

X = vect.fit_transform(reviews)

In [0]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [0]:
tokenizer = Tokenizer(num_words=20)

# docs = df['quote']
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)

In [17]:
sequences[:5]

[[8, 9, 10, 1, 11], [2, 3, 4], [12, 13, 1, 14], [2, 15, 5], [3, 5]]

In [18]:
print(reviews[0])

আমি আর আসছি না এখানে!


In [0]:
VOCAB_SIZE = 50
encoded_reviews = [one_hot(d, VOCAB_SIZE) for d in reviews]
print(f"Encoded reviews: {encoded_reviews}")

Encoded reviews: [[19, 45, 8, 26, 27], [11, 32, 44], [17, 5, 26, 44], [11, 20, 48], [32, 48], [30], [30, 44], [12, 9], [18], [6, 44, 9, 48]]


In [0]:
MAX_LENGTH = 4

padded_reviews = pad_sequences(encoded_reviews, maxlen=MAX_LENGTH, padding='post')
print(padded_reviews)

[[45  8 26 27]
 [11 32 44  0]
 [17  5 26 44]
 [11 20 48  0]
 [32 48  0  0]
 [30  0  0  0]
 [30 44  0  0]
 [12  9  0  0]
 [18  0  0  0]
 [ 6 44  9 48]]


In [0]:
model = Sequential()
embedding_layer = Embedding(VOCAB_SIZE, 8, input_length=MAX_LENGTH)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

print(model.summary())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 4, 8)              400       
_________________________________________________________________
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [0]:
# fit the model
model.fit(padded_reviews, labels, epochs=100, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x7f42ded29dd8>

In [0]:
print(embedding_layer.get_weights()[0].shape)
print(embedding_layer.get_weights())

(50, 8)
[array([[ 0.0531927 , -0.1335724 ,  0.09978101, -0.11504837, -0.14003828,
         0.11393311,  0.12265231, -0.11320274],
       [ 0.04134898,  0.04094077, -0.01528672, -0.01378284,  0.02807479,
         0.0145526 ,  0.03425549, -0.01268482],
       [ 0.0067782 , -0.0050568 , -0.02829794,  0.04341808,  0.00205258,
         0.03645544,  0.02484741,  0.03088689],
       [ 0.03101758,  0.03930685, -0.04244353, -0.04883758,  0.00656687,
         0.01296997, -0.04324911, -0.01757413],
       [-0.02182766, -0.00187176, -0.02171852, -0.04498038,  0.00705006,
         0.01777476, -0.01512363,  0.04027455],
       [-0.13242161, -0.08681805, -0.11331531,  0.0550344 ,  0.05213567,
        -0.12271953, -0.11110894, -0.11625776],
       [-0.10676523,  0.05187485, -0.08877622, -0.09133556, -0.08951628,
         0.06400855,  0.10163167,  0.13527864],
       [ 0.01605253, -0.03935649, -0.04734606, -0.0422982 , -0.01900793,
         0.00696445,  0.04302504,  0.01819885],
       [-0.14192007, -0

In [0]:
loss, accuracy = model.evaluate(padded_reviews, labels, verbose=0)
print(f'Accuracy: {accuracy}')

Accuracy: 1.0
