In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('Tweets.csv')

# Select relevant columns
data = data[['airline_sentiment', 'text']]

# Preprocess text data
data['text'] = data['text'].str.lower()  # Convert text to lowercase

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['airline_sentiment'], test_size=0.2, random_state=42)

# Convert text data to numerical features using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = classifier.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", report)


Accuracy: 0.74
Classification Report:
               precision    recall  f1-score   support

    negative       0.72      0.99      0.84      1889
     neutral       0.76      0.25      0.37       580
    positive       0.93      0.32      0.48       459

    accuracy                           0.74      2928
   macro avg       0.80      0.52      0.56      2928
weighted avg       0.76      0.74      0.69      2928

