In [2]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Sample Twitter data for sentiment analysis (replace with your own dataset)
tweets = [
    ("I love this product. It's amazing!", "positive"),
    ("Terrible experience with this company.", "negative"),
    ("This movie is okay, not great.", "neutral"),
    # Add more tweets with their corresponding sentiment labels
]

# Preprocess the Twitter data
stop_words = set(stopwords.words('english'))
preprocessed_tweets = [(text.lower(), sentiment) for text, sentiment in tweets]

# Split data into features (X) and labels (y)
X = [text for text, _ in preprocessed_tweets]
y = [sentiment for _, sentiment in preprocessed_tweets]

# Vectorize and transform the text data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)
tfidf_transformer = TfidfTransformer()
X_tfidf = tfidf_transformer.fit_transform(X_vectorized)

# Create k-NN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)  # You can adjust the number of neighbors (k)

# Train the classifier
knn_classifier.fit(X_tfidf, y)

# Predict sentiment labels for new data
new_tweets = ["This is a great product!", "I hate it.", "Average experience."]
new_tweets = [text.lower() for text in new_tweets]
X_new_vectorized = vectorizer.transform(new_tweets)
X_new_tfidf = tfidf_transformer.transform(X_new_vectorized)
predicted_sentiments = knn_classifier.predict(X_new_tfidf)

print("Predicted Sentiments:", predicted_sentiments)


Predicted Sentiments: ['negative' 'negative' 'negative']


In [2]:
pip install nltk

Collecting nltk
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 1.5 MB/s eta 0:00:00
Collecting click (from nltk)
  Using cached click-8.1.7-py3-none-any.whl (97 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2023.10.3-cp39-cp39-win_amd64.whl (269 kB)
     -------------------------------------- 269.6/269.6 kB 2.4 MB/s eta 0:00:00
Collecting tqdm (from nltk)
  Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)
     ---------------------------------------- 78.3/78.3 kB 4.3 MB/s eta 0:00:00
Installing collected packages: tqdm, regex, click, nltk
Successfully installed click-8.1.7 nltk-3.8.1 regex-2023.10.3 tqdm-4.66.1
Note: you may need to restart the kernel to use updated packages.
