# **Tweet analysis using LSTM**

In [None]:
from keras.layers import Dense, LSTM, Embedding, Activation
from keras.models import Model, Sequential
from keras.optimizers import RMSprop
from transformers import BertTokenizer
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

**Read dataset**

In [None]:
train_df = pd.read_csv('../input/nlp-getting-started/train.csv')
train_df.head()

In [None]:
tweets = train_df['text'].values
for i in range(5):
    print('{} : {}'.format(i, tweets[i]))

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

In [None]:
def encode_sentence(s):
    tokens = list(tokenizer.tokenize(s))
    tokens.append('[SEP]')
    return tokenizer.convert_tokens_to_ids(tokens)

**Vectorize word**

In [None]:
max_len = 50
x_train = []
for tweet in tweets:
    vec = encode_sentence(tweet)
    x_train.append(vec[:max_len] + [0] * (max_len - len(vec)))

In [None]:
x_train = np.array(x_train)
n = np.amax(x_train)
print(x_train.shape)

In [None]:
y_train = train_df['target'].values
y_train = np.array(y_train)
print(y_train.shape)

In [None]:
EPOCHS = 15
BATCH_SIZE = 32
model=Sequential()
model.add(Embedding(n + 1, BATCH_SIZE, mask_zero=True))
model.add(LSTM(BATCH_SIZE))
model.add(Dense(2, activation = 'sigmoid'))
optimizer = RMSprop(lr = 0.01)
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = optimizer)
model.summary()

In [None]:
model.fit(x_train, y_train, epochs = EPOCHS, batch_size = BATCH_SIZE)

**Read dataset**

In [None]:
test_df = pd.read_csv('../input/nlp-getting-started/test.csv')
test_df.head()

In [None]:
tweets = test_df['text'].values
x_test = []
for tweet in tweets:
    vec = encode_sentence(tweet)
    x_test.append(vec[:max_len] + [0] * (max_len - len(vec)))

**Predict answer**

In [None]:
y_test = [np.argmax(model.predict(np.array([x_test_]))) for x_test_ in x_test]

In [None]:
sub = pd.DataFrame({'id':test_df['id'].values, 'target':y_test})
sub.head()

In [None]:
sub.to_csv('./submission.csv', index = False)