## 1. Rule-Based Sentiment Analysis

In [47]:
# Sample text
text = "I had a terrible experience with this product. It's awful."

# Define lists of positive and negative words
positive_words = ["good", "excellent", "wonderful", "amazing"]
negative_words = ["bad", "terrible", "awful", "horrible"]

In [48]:
# Tokenize the text into words
words = text.lower().split()

# Initialize sentiment scores
positive_score = 0
negative_score = 0

# Calculate sentiment scores based on word counts
for word in words:
    if word in positive_words:
        positive_score += 1
    elif word in negative_words:
        negative_score += 1

In [49]:
# Determine sentiment based on scores
if positive_score > negative_score:
    sentiment = 'positive'
elif negative_score > positive_score:
    sentiment = 'negative'
else:
    sentiment = 'neutral'

# Print the sentiment and scores
print(f"Text: {text}")
print(f"Sentiment: {sentiment}")
print(f"Positive Score: {positive_score}")
print(f"Negative Score: {negative_score}")

Text: I had a terrible experience with this product. It's awful.
Sentiment: negative
Positive Score: 0
Negative Score: 1


## 2. Machine Learning-Based Sentiment Analysis

In [50]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [51]:
df = pd.read_csv('data/IMDB Dataset.csv')

In [52]:
df.shape

(50000, 2)

In [53]:
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [54]:
import re
import string
# Function to remove special characters and change to lowercase
def preprocess_text(text):
    # Remove special characters using regular expressions
    text = re.sub(r'[^a-zA-Z]', ' ', text)    
    # Replace multiple spaces with a single space
    text = re.sub(r'\s+', ' ', text)
    # Remove special characters and sentence terminators using regular expressions and string.punctuation
    text = re.sub(f"[{re.escape(string.punctuation)}]", '', text)
    # Remove carriage returns and tabs
    text = text.replace('\n', '').replace('\t', '').strip()
    #
    text = text.replace('br', '').strip()
    # Convert to lowercase
    text = text.lower()    
    return text

In [55]:
df['cleaned_text'] = df['review'].apply(preprocess_text)

In [56]:
df.head()

Unnamed: 0,review,sentiment,cleaned_text
0,One of the other reviewers has mentioned that ...,positive,one of the other reviewers has mentioned that ...
1,A wonderful little production. <br /><br />The...,positive,a wonderful little production the filming te...
2,I thought this was a wonderful way to spend ti...,positive,i thought this was a wonderful way to spend ti...
3,Basically there's a family where a little boy ...,negative,basically there s a family where a little boy ...
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,petter mattei s love in the time of money is a...


In [57]:
# Split the data into training and testing sets
X = df['cleaned_text']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [58]:
# Create TF-IDF vectors for the text data
tfidf_vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [59]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [60]:
# Predict sentiment on the test data
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.83


## 3. Sentiment Analysis Applications

In [61]:
# pip install nltk

In [62]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [63]:
# Download the VADER lexicon
nltk.download("vader_lexicon")

# Initialize the VADER sentiment intensity analyzer
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ntd20\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [64]:
# Sample text for sentiment analysis
text = "I love this product! It's amazing."

# Get the sentiment scores
sentiment_scores = sia.polarity_scores(text)

# Determine the sentiment based on the compound score
compound_score = sentiment_scores["compound"]

if compound_score >= 0.05:
    sentiment = "positive"
elif compound_score <= -0.05:
    sentiment = "negative"
else:
    sentiment = "neutral"

# Print the sentiment and sentiment scores
print(f"Sentiment: {sentiment}")

Sentiment: positive
