In [1]:
# Import necessary libraries
import nltk
import random
from nltk.corpus import twitter_samples
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Download NLTK resources
nltk.download('twitter_samples')
nltk.download('stopwords')
nltk.download

[nltk_data] Downloading package twitter_samples to /root/nltk_data...
[nltk_data]   Unzipping corpora/twitter_samples.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [2]:
positive_tweets = twitter_samples.strings('positive_tweets.json')
negative_tweets = twitter_samples.strings('negative_tweets.json')

In [3]:
tweets = positive_tweets + negative_tweets

In [4]:
labels = [1] * len(positive_tweets) + [0] * len(negative_tweets)

In [6]:
def preprocess_tweet(tweet):
  lemmatizer = WordNetLemmatizer()
  stop_words = set(stopwords.words('english'))
  # Tokenize
  tokens = word_tokenize(tweet.lower())
  # Remove stopwords and non-alphabetic tokens
  tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
  return ' '.join(tokens)

In [11]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [12]:
preprocessed_tweets = [preprocess_tweet(tweet) for tweet in tweets]

In [19]:
preprocessed_tweets

['followfriday top engaged member community week',
 'hey james odd please call contact centre able assist many thanks',
 'despiteofficial listen last night bleed amazing track scotland',
 'congrats',
 'yeaaaah yippppy accnt verified rqst succeed got blue tick mark fb profile day',
 'bhaktisbanter pallaviruhail one irresistible flipkartfashionfriday http',
 'like keep lovely customer waiting long hope enjoy happy friday lwwf http',
 'impatientraider second thought enough time dd new short entering system sheep must buying',
 'jgh go bayan bye',
 'act mischievousness calling etl layer warehousing app katamari name implies p',
 'followfriday top influencers community week',
 'would love big juicy selfies http http',
 'follow jnlazts amp http follow u back',
 'jjulieredburn perfect already know waiting',
 'great new opportunity junior triathletes aged gatorade series get entry http',
 'laying greeting card range print today love job',
 'friend lunch yummmm nostalgia tb ku',
 'rookiesenpai 

In [13]:
X_train, X_test, y_train, y_test = train_test_split(preprocessed_tweets, labels, test_size=0.2, random_state=42)

In [14]:
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [15]:
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

In [16]:
y_pred = classifier.predict(X_test_tfidf)

In [17]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7625
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.79      0.77       988
           1       0.78      0.73      0.76      1012

    accuracy                           0.76      2000
   macro avg       0.76      0.76      0.76      2000
weighted avg       0.76      0.76      0.76      2000



In [21]:
example_tweets = [
    "I love this movie, it's amazing!",
    "This product is terrible, waste of money.",
    "Feeling happy today! #goodvibes",
    "The customer service was awful, never shopping here again.",
    "Can't wait to see my friends this weekend! #excited"
]
preprocessed_example_tweets = [preprocess_tweet(tweet) for tweet in example_tweets]
example_tweets_tfidf = vectorizer.transform(preprocessed_example_tweets)
predictions = classifier.predict(example_tweets_tfidf)
sentiment_mapping = {0: 'Negative', 1: 'Positive'}

for tweet, prediction in zip(example_tweets, predictions):
    print("Tweet:", tweet)
    print("Predicted Sentiment:", sentiment_mapping[prediction])
    print()


Tweet: I love this movie, it's amazing!
Predicted Sentiment: Positive

Tweet: This product is terrible, waste of money.
Predicted Sentiment: Negative

Tweet: Feeling happy today! #goodvibes
Predicted Sentiment: Positive

Tweet: The customer service was awful, never shopping here again.
Predicted Sentiment: Negative

Tweet: Can't wait to see my friends this weekend! #excited
Predicted Sentiment: Positive

