In [11]:
# ----------------------------------------------------------------------------
# Title: Assignment 3.2
# Author: Surenther Selvaraj
# Date: 26 September 2025
# Modified By: Surenther Selvaraj
# Description: Sentiment Analysis and Preprocessing Text
# Data: https://www.kaggle.com/c/word2vec-nlp-tutorial/data
# ----------------------------------------------------------------------------

In [12]:
#Import the movie review data
import pandas as pd
from textblob import TextBlob
from sklearn.metrics import accuracy_score

# The name of the file
file_name = "labeledTrainData.tsv"

# Using pandas.read_csv with the correct delimiter for a .tsv file
df = pd.read_csv(file_name, sep='\t')

# Check if the data is loaded properly by displaying the first few rows
print("\nFirst 5 rows of the DataFrame:")
print(df.head())



First 5 rows of the DataFrame:
       id  sentiment                                             review
0  5814_8          1  With all this stuff going down at the moment w...
1  2381_9          1  \The Classic War of the Worlds\" by Timothy Hi...
2  7759_3          0  The film starts with a manager (Nicholas Bell)...
3  3630_4          0  It must be assumed that those who praised this...
4  9495_8          1  Superbly trashy and wondrously unpretentious 8...


In [13]:
# Count the number of positive and negative reviews
sentiment_counts = df['sentiment'].value_counts()

# Print the results with clear labels
print("Number of positive and negative reviews:")
print(f"Positive Reviews (1): {sentiment_counts[1]}")
print(f"Negative Reviews (0): {sentiment_counts[0]}")

Number of positive and negative reviews:
Positive Reviews (1): 12500
Negative Reviews (0): 12500


In [14]:
# --- TextBlob Sentiment Analysis ---

# Analyzes the sentiment of a given text using TextBlob. Returns 'Positive' if polarity >= 0, otherwise 'Negative'.
def get_textblob_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity >= 0:
        return 'Positive'
    else:
        return 'Negative'

# Apply the sentiment analysis function to the 'review' column
print("Classifying movie reviews using TextBlob...")
df['TextBlob_Sentiment'] = df['review'].apply(get_textblob_sentiment)

# Display Results
print("\nFirst 5 rows with the new TextBlob_Sentiment column:")
print(df[['review', 'sentiment', 'TextBlob_Sentiment']].head())
print("\n--- Summary of TextBlob Sentiment Classification ---")
print(df['TextBlob_Sentiment'].value_counts())


Classifying movie reviews using TextBlob...

First 5 rows with the new TextBlob_Sentiment column:
                                              review  sentiment  \
0  With all this stuff going down at the moment w...          1   
1  \The Classic War of the Worlds\" by Timothy Hi...          1   
2  The film starts with a manager (Nicholas Bell)...          0   
3  It must be assumed that those who praised this...          0   
4  Superbly trashy and wondrously unpretentious 8...          1   

  TextBlob_Sentiment  
0           Positive  
1           Positive  
2           Negative  
3           Positive  
4           Negative  

--- Summary of TextBlob Sentiment Classification ---
TextBlob_Sentiment
Positive    19017
Negative     5983
Name: count, dtype: int64


In [None]:
# --- TextBlob Accuracy Calculation ---

# Map the original numerical sentiment to text labels for comparison
df['true_sentiment_text'] = df['sentiment'].map({1: 'Positive', 0: 'Negative'})

# Calculate the accuracy by comparing the true labels to TextBlob's predictions
accuracy = accuracy_score(df['true_sentiment_text'], df['TextBlob_Sentiment'])

# Display Results

print("\n--- Model Accuracy ---")
print(f"The accuracy of the TextBlob model is: {accuracy:.2f}")


--- Model Accuracy ---
The accuracy of the TextBlob model is: 0.69


### Conclusion and Analysis for TextBlob

The model's accuracy is calculated by comparing the sentiment predicted by TextBlob with the true sentiment labels provided in the dataset. Since the dataset is perfectly balanced with 12,500 positive and 12,500 negative reviews, a random guess would have an expected accuracy of 50%. The TextBlob model, leveraging a pre-trained sentiment lexicon, perform much better than this baseline. It's accuracy was 69% (0.69)

Therefore, the TextBlob model is significantly better than random guessing. Its accuracy, as you will see from the script's output, will be a good indicator of its effectiveness in classifying movie review sentiment without any prior training.