# Social Media Sentiment Analysis

In [1]:
!pip install tensorflow scikit-learn nltk spacy gradio



In [2]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
import numpy as np
import pandas as pd
import re
import nltk
import spacy
import gradio as gr
import tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential , load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense , Dropout
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [4]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [5]:
npl = spacy.load('en_core_web_sm')

In [6]:
from google.colab import files
uploaded = files.upload()

Saving sentimentdataset.csv to sentimentdataset (3).csv


In [7]:
df = pd.read_csv(next(iter(uploaded.keys())))
print(df.head())

   Unnamed: 0.1  Unnamed: 0  \
0             0           0   
1             1           1   
2             2           2   
3             3           3   
4             4           4   

                                                Text    Sentiment  \
0   Enjoying a beautiful day at the park!        ...   Positive     
1   Traffic was terrible this morning.           ...   Negative     
2   Just finished an amazing workout! 💪          ...   Positive     
3   Excited about the upcoming weekend getaway!  ...   Positive     
4   Trying out a new recipe for dinner tonight.  ...   Neutral      

             Timestamp            User     Platform  \
0  2023-01-15 12:30:00   User123          Twitter     
1  2023-01-15 08:45:00   CommuterX        Twitter     
2  2023-01-15 15:45:00   FitnessFan      Instagram    
3  2023-01-15 18:20:00   AdventureX       Facebook    
4  2023-01-15 19:55:00   ChefCook        Instagram    

                                     Hashtags  Retweets  Likes     

In [8]:
df = df[['Text','Sentiment']]

In [9]:
from nltk.corpus import stopwords

def preprocessing_text(text):
  text = text.lower()
  text = re.sub(r'[^a-z\s]', '', text)
  token = nltk.word_tokenize(text)
  stop_word = set(stopwords.words('english'))
  filterd = [word for word in token if word not in stop_word]
  doc = npl(" ".join(filterd))
  lemmatized = [token.lemma_ for token in doc]
  return " ".join(lemmatized)

In [10]:
df['text']= df['Text'].apply(preprocessing_text)
display(df.head())

Unnamed: 0,Text,Sentiment,text
0,Enjoying a beautiful day at the park! ...,Positive,enjoy beautiful day park
1,Traffic was terrible this morning. ...,Negative,traffic terrible morning
2,Just finished an amazing workout! 💪 ...,Positive,finished amazing workout
3,Excited about the upcoming weekend getaway! ...,Positive,excited upcoming weekend getaway
4,Trying out a new recipe for dinner tonight. ...,Neutral,try new recipe dinner tonight


In [11]:
le = LabelEncoder()
df['Sentiment'] = le.fit_transform(df['Sentiment'])

In [12]:
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['Sentiment'], test_size=0.2, random_state=42)

In [13]:
max_word = 5000
max_len = 100

In [14]:
tokenizer = Tokenizer(num_words=max_word , oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

In [15]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [16]:
X_train_pad = pad_sequences(X_train_seq , maxlen=max_len , padding='post' , truncating='post')
X_test_pad = pad_sequences(X_test_seq , maxlen=max_len , padding='post' , truncating='post')

In [17]:
model = Sequential()
model.add(Embedding(input_dim=max_word , output_dim=32 , input_length=max_len))
model.add(LSTM(32 , return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(32))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(le.classes_),activation='softmax'))



In [18]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [19]:
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [20]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))
model.fit(X_train_pad, y_train, class_weight=class_weights_dict, epochs=50, batch_size=32)

Epoch 1/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 75ms/step - accuracy: 0.0150 - loss: 8.8479
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 75ms/step - accuracy: 0.0484 - loss: 8.6707
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - accuracy: 0.0501 - loss: 8.3338
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 126ms/step - accuracy: 0.0622 - loss: 7.8886
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 79ms/step - accuracy: 0.0585 - loss: 7.8839
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - accuracy: 0.0463 - loss: 7.9202
Epoch 7/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 75ms/step - accuracy: 0.0543 - loss: 7.9668
Epoch 8/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - accuracy: 0.0528 - loss: 7.9793
Epoch 9/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7da4f0f9df40>

In [21]:
loss,acc = model.evaluate(X_test_pad , y_test)
print(f"Test Accuracy:{acc*100:.2f}%")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.0185 - loss: 6.8759
Test Accuracy:2.04%


In [22]:
model.save('Sentiment_model.h5')
with open('tokenizer.pickle', 'wb') as f:
    pickle.dump(tokenizer,f)
np.save('classes.npy',le.classes_)



In [23]:
from google.colab import files
files.download('Sentiment_model.h5')
files.download('tokenizer.pickle')
files.download('classes.npy')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
def predict_sentiment(text):
    processed = preprocess_text(text)
    seq = tokenizer.texts_to_sequences([processed])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    pred = model.predict(padded)
    label = le.inverse_transform([np.argmax(pred)])
    return label[0]

iface = gr.Interface(fn=predict_sentiment, inputs="text", outputs="text", live=True)
iface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6881c46f2a113f9540.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


