##  EECP0020 - LINGUAGENS FORMAIS E AUTÔMATOS (2024 .1 - T01)
PROJETO GRUPO 2


### EXEMPLO INICIAL

In [None]:
import re

class SentimentAnalyzer:
    def __init__(self):
        self.positive_patterns = [
            r'excelente',
            r'ótimo',
            r'maravilhoso',
            r'incrível'
        ]
        self.negative_patterns = [
            r'péssimo',
            r'ruim',
            r'horrível',
            r'desastroso',
            r'insatisfeito'
        ]

    def analyze_sentiment(self, text):
        positive_score = self._calculate_score(text, self.positive_patterns)
        negative_score = self._calculate_score(text, self.negative_patterns)

        if positive_score > negative_score:
            return "Positive"
        elif positive_score < negative_score:
            return "Negative"
        else:
            return "Neutral"

    def _calculate_score(self, text, patterns):
        score = 0
        for pattern in patterns:
            score += len(re.findall(pattern, text, re.IGNORECASE))
        return score

# Exemplo de uso
analyzer = SentimentAnalyzer()
text = "O produto é horrível, estou muito insatisfeito!"
sentiment = analyzer.analyze_sentiment(text)
print("Sentimento:", sentiment)


Sentimento: Negative


### PROJETO

In [None]:
import nltk
nltk.download('movie_reviews') # baixando dados iniciais
nltk.download('stopwords') # baixando conjunto de stopwords
nltk.download('punkt') # baixando modelo treinável não supervisionado

[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
import random
from nltk.corpus import movie_reviews
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import FreqDist
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from nltk.classify import accuracy

In [None]:
movie_reviews.categories()

['neg', 'pos']

In [None]:
len(movie_reviews.fileids())

2000

In [None]:
num_neg = len(movie_reviews.fileids(categories='neg'))
num_pos = len(movie_reviews.fileids(categories='pos'))

print("Quantidades de reviews positivos: ", num_pos)
print("Quantidades de reviews negativos: ", num_neg)

Quantidades de reviews positivos:  1000
Quantidades de reviews negativos:  1000


In [None]:
class SentimentAnalyzer:
    def __init__(self):
        # Carrega as resenhas de filmes da biblioteca NLTK
        self.documents = [(list(movie_reviews.words(fileid)), category)
                         for category in movie_reviews.categories()
                         for fileid in movie_reviews.fileids(category)]
        random.shuffle(self.documents)

        # Pré-processamento de texto
        all_words = [word.lower() for word in movie_reviews.words()]
        self.all_words = [word for word in all_words if word.isalpha()]
        self.all_words = [word for word in self.all_words if word not in stopwords.words('english')]
        self.all_words = FreqDist(self.all_words)
        self.word_features = list(self.all_words.keys())

        # Cria um conjunto de características para cada resenha
        self.featuresets = [(self.find_features(rev), category) for (rev, category) in self.documents]

        # Divide o conjunto de dados em treinamento e teste
        self.train_set, self.test_set = train_test_split(self.featuresets, test_size=0.25)

        # Treina o classificador
        self.classifier = SklearnClassifier(MultinomialNB())
        self.classifier.train(self.train_set)

    def find_features(self, document):
        words = set(document)
        features = {}
        for w in self.word_features:
            features[w] = (w in words)
        return features

    def analyze_sentiment(self, text):
        # Pré-processamento do texto de entrada
        words = word_tokenize(text.lower(), language='english')
        words = [word for word in words if word.isalpha()]
        words = [word for word in words if word not in stopwords.words('english')]

        # Extraindo características do texto de entrada
        features = self.find_features(words)

        # Classificando o sentimento usando o classificador treinado
        sentiment = self.classifier.classify(features)
        return sentiment

    def evaluate_model(self):
        # Avaliando a acurácia do modelo usando o conjunto de teste
        accuracy_score = accuracy(self.classifier, self.test_set)
        return accuracy_score

In [None]:
# Exemplo de uso
if __name__ == "__main__":
    # Inicializando o analisador de sentimentos
    analyzer = SentimentAnalyzer()

    # Textos de exemplo
    texts = [
       "I loved the new movie!",
        "The customer service was terrible, I will never go back.",
        "This product is good, but could be better.",
        "I'm neutral about this book.",
       "I hated the new movie!",
       "This was the worst thing I have seen in my entire life. Aesthetics aside, there was nothing. Nothing to say, nothing to comment on. Just mindless amalgamation of scenes that were supposed to engineer some kind of reaction. The only reaction the entire theater had was sighing, cringing, and leaving.",
       "Am a lifelong sci-fi fan. This one is visually stunning. Has a great plot too. Am pretty easily bored and this one never let my attention lapse. A must see as far as I'm concerned. The special effects on the Simlicants was flawless. The twist of who wants peace and who is the aggressor was also unexpected. Watched Rebel Moon recently and it was a huge disappointment compared to The Creator. The Creator is a unique story and had the look and feel of the Blade Runner movies, at least in the large cities. The rural scenes were also beautiful, as were the water towns. I bought the blu ray and it will get watched again in the future."
    ]

    # Analisando sentimentos para cada texto de exemplo
    for text in texts:
        sentiment = analyzer.analyze_sentiment(text)
        print(f"Texto: '{text}'\nSentimento: {sentiment}\n")

Texto: 'I loved the new movie!'
Sentimento: pos

Texto: 'The customer service was terrible, I will never go back.'
Sentimento: neg

Texto: 'This product is good, but could be better.'
Sentimento: neg

Texto: 'I'm neutral about this book.'
Sentimento: pos

Texto: 'I hated the new movie!'
Sentimento: neg

Texto: 'This was the worst thing I have seen in my entire life. Aesthetics aside, there was nothing. Nothing to say, nothing to comment on. Just mindless amalgamation of scenes that were supposed to engineer some kind of reaction. The only reaction the entire theater had was sighing, cringing, and leaving.'
Sentimento: neg

Texto: 'Am a lifelong sci-fi fan. This one is visually stunning. Has a great plot too. Am pretty easily bored and this one never let my attention lapse. A must see as far as I'm concerned. The special effects on the Simlicants was flawless. The twist of who wants peace and who is the aggressor was also unexpected. Watched Rebel Moon recently and it was a huge disappo

In [None]:
accurary_model = analyzer.evaluate_model()
print("Acurácia do Modelo: ", accurary_model)

Acurácia do Modelo:  0.83
