In [1]:
# Importing required Libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
# Reading the file
import pandas as pd
twitter= pd.read_csv('/content/Twitter_comments_class.csv')
twitter.columns= ['SrNo', 'tag', 'Sentiment', 'Tweet']

In [3]:
twitter

Unnamed: 0,SrNo,tag,Sentiment,Tweet
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
...,...,...,...,...
74677,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74678,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74679,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74680,9200,Nvidia,Positive,Just realized between the windows partition of...


In [4]:
twitter.isnull().sum()  # checking the null values

Unnamed: 0,0
SrNo,0
tag,0
Sentiment,0
Tweet,858


In [5]:
twitter=twitter.dropna(axis='rows')  # dropping null values

In [6]:
twitter.shape

(73824, 4)

In [7]:
twitter.Sentiment.value_counts()   # checking the counts of each sentiment

Unnamed: 0_level_0,count
Sentiment,Unnamed: 1_level_1
Negative,22312
Positive,20619
Neutral,18051
Irrelevant,12842


In [8]:
val=twitter.Sentiment
val

Unnamed: 0,Sentiment
0,Positive
1,Positive
2,Positive
3,Positive
4,Positive
...,...
74677,Positive
74678,Positive
74679,Positive
74680,Positive


In [9]:
from sklearn.preprocessing import LabelEncoder   # Encoding the sentiments
le=LabelEncoder()
labels= le.fit_transform(val)
labels

array([3, 3, 3, ..., 3, 3, 3])

In [10]:
sentences= twitter.Tweet
sentences[0]

'im getting on borderlands and i will murder you all ,'

In [11]:
# Importing required libraries
import re  # regular expressions
import nltk  # natural language tool kit
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [12]:
stop= set(stopwords.words('english'))
stem= PorterStemmer()
lemma= WordNetLemmatizer()

In [13]:
# Cleaning the text
def text_cleaner(text):
    text= str(text).lower()
    text= re.sub(r'\d+', '', text)
    text= re.sub(r'[^\w\s]', '', text)

    text= ' '.join([word for word in text.split() if word not in stop])
    text= " ".join(stem.stem(word) for word in text.split())
    # text= " ".join(lemma.lemmatize(word) for word in text.split())
    return text

In [14]:
# Adding a new column of cleaned comments
twitter['Comments_cleaned']= twitter['Tweet'].apply(text_cleaner)
twitter

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twitter['Comments_cleaned']= twitter['Tweet'].apply(text_cleaner)


Unnamed: 0,SrNo,tag,Sentiment,Tweet,Comments_cleaned
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...,im get borderland murder
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...,come border kill
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...,im get borderland kill
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...,im come borderland murder
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...,im get borderland murder
...,...,...,...,...,...
74677,9200,Nvidia,Positive,Just realized that the Windows partition of my...,realiz window partit mac like year behind nvid...
74678,9200,Nvidia,Positive,Just realized that my Mac window partition is ...,realiz mac window partit year behind nvidia dr...
74679,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...,realiz window partit mac year behind nvidia dr...
74680,9200,Nvidia,Positive,Just realized between the windows partition of...,realiz window partit mac like year behind nvid...


In [15]:
sentences= twitter.Comments_cleaned
sentences[0]

'im get borderland murder'

In [16]:
# Tokenize the sentences
tokenizer = Tokenizer()
clean_sentences = [sentence for sentence in sentences ]
len(clean_sentences)

73824

In [17]:
tokenizer.fit_on_texts(clean_sentences)
word_index = tokenizer.word_index

In [18]:
word_index

{'game': 1,
 'play': 2,
 'get': 3,
 'like': 4,
 'im': 5,
 'fuck': 6,
 'love': 7,
 'go': 8,
 'one': 9,
 'good': 10,
 'time': 11,
 'realli': 12,
 'new': 13,
 'see': 14,
 'look': 15,
 'make': 16,
 'peopl': 17,
 'johnson': 18,
 'shit': 19,
 'thank': 20,
 'cant': 21,
 'dont': 22,
 'best': 23,
 'still': 24,
 'player': 25,
 'year': 26,
 'even': 27,
 'got': 28,
 'day': 29,
 'great': 30,
 'work': 31,
 'xbox': 32,
 'want': 33,
 'googl': 34,
 'pleas': 35,
 'facebook': 36,
 'amazon': 37,
 'dead': 38,
 'rhandlerr': 39,
 'know': 40,
 'call': 41,
 'verizon': 42,
 'back': 43,
 'microsoft': 44,
 'say': 45,
 'us': 46,
 'much': 47,
 'ps': 48,
 'ban': 49,
 'use': 50,
 'fifa': 51,
 'red': 52,
 'come': 53,
 'guy': 54,
 'would': 55,
 'tri': 56,
 'first': 57,
 'fix': 58,
 'ive': 59,
 'wait': 60,
 'watch': 61,
 'today': 62,
 'home': 63,
 'video': 64,
 'pubg': 65,
 'think': 66,
 'nvidia': 67,
 'fun': 68,
 'borderland': 69,
 'live': 70,
 'gta': 71,
 'also': 72,
 'team': 73,
 'unk': 74,
 'thing': 75,
 'need': 76,

In [19]:
# Convert sentences to sequences of indices
sequences = tokenizer.texts_to_sequences(clean_sentences)

# Pad sequences to ensure they have the same length
maxlen = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=maxlen)

padded_sequences.shape

(73824, 163)

In [20]:
labels.shape

(73824,)

In [21]:
np.unique(labels)

array([0, 1, 2, 3])

# Defining RNN model

In [22]:
# RNN model
embedding_dim = 16
vocab_size = len(word_index) + 1

# Define RNN model for multi-class classification
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=maxlen),
    SimpleRNN(32),  # Simple RNN layer with 32 units
    Dense(4, activation='softmax')  # Output layer with softmax activation for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, labels, epochs=10, verbose=1)

# Test the model
test_sentence = 'This movie is amazing'
test_sequence = tokenizer.texts_to_sequences([test_sentence])
padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
prediction = model.predict(padded_test_sequence)
predicted_class = np.argmax(prediction)
print("Sentiment prediction:", predicted_class)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Sentiment prediction: 3


In [23]:
# Applying gradio interface
!pip install gradio
import gradio as gr
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Define the prediction function
def predict_sentiment(text):
    # Preprocess the input text
    test_sequence = tokenizer.texts_to_sequences([text])
    padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
    # Predict sentiment using the model
    prediction = model.predict(padded_test_sequence)
    predicted_class = np.argmax(prediction)

    # Use LabelEncoder to map predicted class index to original sentiment label
    predicted_label = le.inverse_transform([predicted_class])[0]

    return predicted_label

# Create a Gradio interface
iface = gr.Interface(fn=predict_sentiment,
                     inputs="text",
                     outputs="text",
                     title="Sentiment Analysis",
                     description="Enter a text to predict its sentiment (negative, somewhat negative, somewhat positive, or positive).")

# Launch the interface
iface.launch()


Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from g



# Defining LSTM model

In [25]:
from tensorflow.keras.layers import LSTM, Dropout

# Define a more complex LSTM model for multi-class classification
model_complex_lstm = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=maxlen),
    LSTM(64, return_sequences=True),  # First LSTM layer with 64 units and return sequences
    Dropout(0.5),  # Dropout layer to reduce overfitting
    LSTM(32),  # Second LSTM layer with 32 units
    Dropout(0.5),  # Dropout layer
    Dense(64, activation='relu'),  # Dense layer with ReLU activation
    Dense(4, activation='softmax')  # Output layer with softmax activation for multi-class classification
])

In [26]:
model_complex_lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [27]:
model_complex_lstm.fit(padded_sequences, labels, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7e41706ee170>

In [28]:
# Test the complex LSTM model
test_sentence = 'This movie is amazing'
test_sequence = tokenizer.texts_to_sequences([test_sentence])
padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
prediction_complex_lstm = model_complex_lstm.predict(padded_test_sequence)
predicted_class_complex_lstm = np.argmax(prediction_complex_lstm)
print("Sentiment prediction using complex LSTM model:", predicted_class_complex_lstm)

Sentiment prediction using complex LSTM model: 3


In [29]:
import gradio as gr
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Define the prediction function
def predict_sentiment(text):
    # Preprocess the input text
    test_sequence = tokenizer.texts_to_sequences([text])
    padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
    # Predict sentiment using the model
    prediction = model_complex_lstm.predict(padded_test_sequence)
    predicted_class = np.argmax(prediction)

    # Use LabelEncoder to map predicted class index to original sentiment label
    predicted_label = le.inverse_transform([predicted_class])[0]

    return predicted_label

# Create a Gradio interface
iface = gr.Interface(fn=predict_sentiment,
                     inputs="text",
                     outputs="text",
                     title="Sentiment Analysis",
                     description="Enter a text to predict its sentiment (negative, somewhat negative, somewhat positive, or positive).")

# Launch the interface
iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://2a49ea8a7eb06e78f9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


