In [None]:
# Load a dataset for sentiment analysis and preprocess text by cleaning and tokenizing it.

import pandas as pd
import re
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
import nltk
nltk.download('punkt')

# Example dataset (replace with actual data if available)
data = {
    'text': ["I love this movie!", "I hate this product!", "This is the best thing ever!", "Awful experience, never again."],
    'label': ['positive', 'negative', 'positive', 'negative']
}
df = pd.DataFrame(data)

# Preprocess text by converting to lowercase and removing punctuation
df['text'] = df['text'].apply(lambda x: re.sub(r'[^\w\s]', '', x.lower()))
print("Preprocessed Data:\n", df)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

In [None]:
# Transform text data into numerical features using TF-IDF to prepare it for model training.

from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Transform training and test data
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

print("TF-IDF Features Shape:", X_train_tfidf.shape)

In [None]:
# Train a Naive Bayes classifier on the TF-IDF features to classify text into sentiment labels.

from sklearn.naive_bayes import MultinomialNB

# Initialize and train the Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

# Make predictions on the test data
y_pred = nb_model.predict(X_test_tfidf)

In [None]:
# Evaluate the model’s performance using metrics like accuracy, precision, recall, and F1-score.

from sklearn.metrics import accuracy_score, classification_report

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:\n", classification_report(y_test, y_pred))