In [1]:
import numpy as np
import pandas as pd
import re

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
data = pd.read_csv('/content/drive/MyDrive/datasets/judge-1377884607_tweet_product_company.csv', encoding='ISO-8859-1')

In [4]:
data.head(15)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion
5,@teachntech00 New iPad Apps For #SpeechTherapy...,,No emotion toward brand or product
6,,,No emotion toward brand or product
7,"#SXSW is just starting, #CTIA is around the co...",Android,Positive emotion
8,Beautifully smart and simple idea RT @madebyma...,iPad or iPhone App,Positive emotion
9,Counting down the days to #sxsw plus strong Ca...,Apple,Positive emotion


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 3 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   tweet_text                                          9092 non-null   object
 1   emotion_in_tweet_is_directed_at                     3291 non-null   object
 2   is_there_an_emotion_directed_at_a_brand_or_product  9093 non-null   object
dtypes: object(3)
memory usage: 213.2+ KB


In [6]:
data.isna().sum()

tweet_text                                               1
emotion_in_tweet_is_directed_at                       5802
is_there_an_emotion_directed_at_a_brand_or_product       0
dtype: int64

In [7]:
data.drop('emotion_in_tweet_is_directed_at', axis=1, inplace=True)

In [8]:
data.dropna(inplace=True)

In [9]:
data.head(15)

Unnamed: 0,tweet_text,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion
5,@teachntech00 New iPad Apps For #SpeechTherapy...,No emotion toward brand or product
7,"#SXSW is just starting, #CTIA is around the co...",Positive emotion
8,Beautifully smart and simple idea RT @madebyma...,Positive emotion
9,Counting down the days to #sxsw plus strong Ca...,Positive emotion
10,Excited to meet the @samsungmobileus at #sxsw ...,Positive emotion


In [10]:
data['is_there_an_emotion_directed_at_a_brand_or_product'].unique()

array(['Negative emotion', 'Positive emotion',
       'No emotion toward brand or product', "I can't tell"], dtype=object)

In [11]:
def preprocess_tweet(tweet):

    tweet = tweet.lower()
    try:
        tweet = re.sub(r"(http|https):\/\/\S+\.\S+\/[^\s]+", "", tweet)
        tweet = re.sub(r"pic\.twitter\.com\/\w+[^\s]+", "", tweet)
        tweet = re.sub(r"@\w+[^\s]+", "", tweet)
    except:
      tweet = tweet

    return tweet

In [12]:
data['tweet_text_clean'] = data['tweet_text'].apply(preprocess_tweet)

In [13]:
max_len = 100  # Maximum tweet length
embedding_dim = 128  # Dimensionality of word embeddings

In [14]:
tweets = data["tweet_text_clean"]
labels = data["is_there_an_emotion_directed_at_a_brand_or_product"]

In [15]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

In [16]:
labels_array = labels.to_numpy().reshape(-1, 1)
labels = encoder.fit_transform(labels_array)

In [17]:
labels

array([[0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.]])

In [18]:
from keras.preprocessing import text
from keras.utils import pad_sequences

In [19]:
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(list(tweets))
tokenized_texts = tokenizer.texts_to_sequences(tweets)
X = pad_sequences(tokenized_texts, maxlen=100)

In [20]:
X[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         22,   50,   10,  605,   17,  256,  110, 2579,  632,    5, 1348,
         24,   31,   85,  890,   22,  103,    4, 1111, 2580, 3948,    5,
          1], dtype=int32)

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2)

In [22]:
from keras.layers import Dense, LSTM, Embedding
from keras.models import Sequential

In [23]:
len(encoder.categories_[0])

4

In [24]:
model = Sequential()
model.add(Embedding(len(tokenizer.word_index)+1, 128, input_length=100))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(60))
model.add(Dense(50,activation='relu'))
model.add(Dense(len(encoder.categories_[0]), activation='softmax'))

In [25]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
model.fit(X_train, y_train, epochs=10, validation_split=0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b0c4c1bca90>

In [34]:
def predict_sentiment(text):
  sequence = tokenizer.texts_to_sequences([text])
  padded_sequence = pad_sequences(sequence, maxlen=max_len)
  prediction = model.predict(padded_sequence)[0]
  sentiment_index = np.argmax(prediction)
  sentiment = encoder.categories_[0][sentiment_index]
  return sentiment


In [36]:
new_tweet = "Beautifully smart and simple idea"
sentiment = predict_sentiment(new_tweet)
print(f"Sentiment for '{new_tweet}': {sentiment}")

Sentiment for 'Beautifully smart and simple idea': Positive emotion


In [37]:
new_tweet = "Found the app kyping my iPhone's geolocation &amp; not releasing when in background. Need a patch"
sentiment = predict_sentiment(new_tweet)
print(f"Sentiment for '{new_tweet}': {sentiment}")

Sentiment for 'Found the app kyping my iPhone's geolocation &amp; not releasing when in background. Need a patch': Negative emotion
