# **Social Media Sentiment Analysis**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

### **Reddit**

In [None]:
df = pd.read_csv('/content/Reddit_Data.csv.zip')
df.head()

Unnamed: 0,clean_comment,category
0,family mormon have never tried explain them t...,1
1,buddhism has very much lot compatible with chr...,1
2,seriously don say thing first all they won get...,-1
3,what you have learned yours and only yours wha...,0
4,for your own benefit you may want read living ...,1


In [None]:
num_classes = 3

# Encode sentiment labels
df['clean_comment'] = df['clean_comment'].astype(str)
df['category'] = df['category'].map({0: 0, 1: 1, -1: 2})

max_words = 1000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['clean_comment'])
sequences = tokenizer.texts_to_sequences(df['clean_comment'])
X = pad_sequences(sequences, maxlen=max_len)
y = pd.get_dummies(df['category']).values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# LSTM model
model = Sequential()
model.add(Embedding(max_words, 50, input_length=max_len))
model.add(LSTM(100))
model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Model Evaluation
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 84.13%


In [None]:
new_comments = ["Sky is blue", "Sun looks shiny"]
new_sequences = tokenizer.texts_to_sequences(new_comments)
new_X = pad_sequences(new_sequences, maxlen=max_len)

predictions = model.predict(new_X)
predicted_sentiments = [int(pred.argmax()) - 1 for pred in predictions]
print('Predicted Sentiments:', predicted_sentiments)

Predicted Sentiments: [-1, -1]


### **Twitter**

In [2]:
df_twitter = pd.read_csv('/content/Twitter_Data.csv.zip')
df_twitter.head()

Unnamed: 0,clean_text,category
0,when modi promised “minimum government maximum...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0


In [3]:
num_classes = 3

# Encode sentiment labels
df_twitter['clean_text'] = df_twitter['clean_text'].astype(str)
df_twitter['category'] = df_twitter['category'].map({0: 0, 1: 1, -1: 2})

max_words = 1000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df_twitter['clean_text'])
sequences = tokenizer.texts_to_sequences(df_twitter['clean_text'])
X = pad_sequences(sequences, maxlen=max_len)
y = pd.get_dummies(df_twitter['category']).values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# LSTM model
model = Sequential()
model.add(Embedding(max_words, 50, input_length=max_len))
model.add(LSTM(100))
model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Training
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Model Evaluation
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 83.95%


In [None]:
new_comments = ["Deserts are hot", "Sun looks shiny"]
new_sequences = tokenizer.texts_to_sequences(new_comments)
new_X = pad_sequences(new_sequences, maxlen=max_len)

predictions = model.predict(new_X)
predicted_sentiments = [int(pred.argmax()) - 1 for pred in predictions]
print('Predicted Sentiments:', predicted_sentiments)