In [None]:
# install libraries 

!pip install pandas matplotlib tensorflow


In [None]:
import pandas as pd

df = pd.read_csv("./Tweets.csv")

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
# need only the sentimental and text columns

review_df = df[['text','airline_sentiment']]

print(review_df.shape)
review_df.head(5)

EDA


In [None]:
df.columns

In [None]:
# Drop the neutral reviews because this is the binary classification so we need only positive and negative
review_df = review_df[review_df['airline_sentiment'] != 'neutral']

print(review_df.shape)
review_df.head(5)

In [None]:
review_df["airline_sentiment"].value_counts()

Text preprocessing (tokenization, padding)

In [None]:
# convert categorical values into numerical values

# positive = 0 and negative = 1
sentiment_label = review_df.airline_sentiment.factorize()
sentiment_label

In [None]:
tweet = review_df.text.values
tweet

In [None]:
# tokenize all the text

from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=5000)

tokenizer.fit_on_texts(tweet)

In [None]:
# replace the words with their assigned numbers
encoded_docs = tokenizer.texts_to_sequences(tweet)


In [None]:
# padding to pad the sentences to have equal length

from tensorflow.keras.preprocessing.sequence import pad_sequences

padded_sequence = pad_sequences(encoded_docs, maxlen=200)


Text Classifier

Here I use LSTM architecture for this task , because it's work well with text data

In [None]:
from tensorflow.keras.models import Sequential , load_model
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D
from tensorflow.keras.layers import Embedding

vocab_size = len(tokenizer.word_index) + 1
embedding_vector_length = 32
model = Sequential()
model.add(Embedding(vocab_size, embedding_vector_length, input_length=200))
model.add(SpatialDropout1D(0.25))
model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])

print(model.summary())

Train model 

In [None]:
history = model.fit(padded_sequence,sentiment_label[0],validation_split=0.2, epochs=5, batch_size=32)

In [None]:
# save model
model.save("./sentimental_analysis.h5")

# It can be used to reconstruct the model identically.
inference_model = load_model("./sentimental_analysis.h5")


Plotting

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')

plt.legend()
plt.show()

Inference the model

In [None]:
def predict_sentiment(text):
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw,maxlen=200)
    prediction = int(model.predict(tw).round().item())
    print("Predicted label: ", sentiment_label[1][prediction])


test_sentence1 = "I enjoyed my journey on my first flight."
predict_sentiment(test_sentence1)

test_sentence2 = "This is the worst flight experience of my life!"
predict_sentiment(test_sentence2)
