# Sentiment Analysis using ANN

In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
df = pd.read_csv("datasets/Sentiment.csv")
df.head()

Unnamed: 0,Index,message to examine,label (depression result)
0,106,just had a real good moment. i missssssssss hi...,0
1,217,is reading manga http://plurk.com/p/mzp1e,0
2,220,@comeagainjen http://twitpic.com/2y2lx - http:...,0
3,288,@lapcat Need to send 'em to my accountant tomo...,0
4,540,ADD ME ON MYSPACE!!! myspace.com/LookThunder,0


In [3]:
# Check for missing values
df.isnull().sum()

Index                        0
message to examine           0
label (depression result)    0
dtype: int64

In [4]:
# Encoding using TfidfVectorizer
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df["message to examine"])

# Convert the sparse TF-IDF matrix to a dense NumPy matrix
X = X.toarray()

y = df["label (depression result)"]

In [5]:
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
# Build the ANN model
model = keras.Sequential(
    [
        keras.layers.Dense(128, input_shape=(X_train.shape[1],), activation="relu"),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)

In [7]:
# Compile the model
# adam is one of the best optimizer and we ususally use it
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [8]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2292ff89970>

In [9]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=32)
print("Test loss:", results[0], "\nTest acc:", results[1])

Evaluate on test data
Test loss: 0.1660359799861908 
Test acc: 0.9747939705848694


---

# Saving the model

---

In [10]:
# Save the model
model.save("./models/sentiment_analysis.keras")

In [11]:
# Load the saved model
loaded_model = keras.models.load_model("./models/sentiment_analysis.keras")
loaded_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               640128    
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 64)                4160      
                                                                 
 dense_3 (Dense)             (None, 64)                4160      
                                                                 
 dense_4 (Dense)             (None, 64)                4160      
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 660929 (2.52 MB)
Trainable params: 660929 