In [7]:
positive_words = set()
negative_words = set()

with open('positive-words.txt', 'r') as file:
    positive_words.update(line.strip() for line in file)

with open('negative-words.txt', 'r') as file:
    negative_words.update(line.strip() for line in file)

def preprocess_text(text):
    words = text.lower().split()
    
    words = [word.strip('.,!?")(') for word in words]
    
    return words

def calculate_sentiment_score(review, positive_words, negative_words):
    words = preprocess_text(review)
    
    # Count positive and negative words
    positive_count = sum(1 for word in words if word in positive_words)
    negative_count = sum(1 for word in words if word in negative_words)
    positive_word = [word for word in words if word in positive_words]
    negative_word = [word for word in words if word in negative_words]
    
    sentiment_score = positive_count - negative_count
    # print(positive_word, negative_word, positive_count,negative_count)
    return sentiment_score

def analyze_sentiment(review, positive_words, negative_words):
    sentiment_score = calculate_sentiment_score(review, positive_words, negative_words)
    
    if sentiment_score > 0:
        return "Positive"
    else:
        return "Negative"



In [8]:
import os
from sklearn.metrics import confusion_matrix,accuracy_score, precision_score, recall_score, f1_score

test_reviews = []
true_labels = []

test_folder = "test"
for filename in os.listdir(test_folder):
    if filename.endswith(".txt"):
        with open(os.path.join(test_folder, filename), 'r') as file:
            review = file.read()
            test_reviews.append(review)
            if "pos" in filename:
                true_labels.append("Positive")
            elif "neg" in filename:
                true_labels.append("Negative")

predicted_labels = [analyze_sentiment(review, positive_words, negative_words) for review in test_reviews]

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')
cm = confusion_matrix(true_labels, predicted_labels, labels=['Positive', 'Negative'])

print("Confusion matrix:\n", cm, "\nAccuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Confusion matrix:
 [[107  53]
 [ 45 115]] 
Accuracy: 0.69375
Precision: 0.6942355889724311
Recall: 0.69375
F1 Score: 0.6935584740462789
