In [1]:
import os
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load the data from a local file
data = pd.read_csv('/Tweets.csv', encoding='utf-8')

# Filter the data to tweets related to Twitter
data = data[data['text'].str.contains('Twitter')]

# Preprocess  data
data['sentiment'] = data['airline_sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2})
data = data[['text', 'sentiment']]
data = data.dropna()
data = data.reset_index(drop=True)

# Split  data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['sentiment'], test_size=0.2, random_state=42)

# Vectorize  text data
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train the Naïve Bayes model
nb = MultinomialNB()
nb.fit(X_train_vectorized, y_train)
nb_y_pred = nb.predict(X_test_vectorized)

# Train the Logistic Regression model
lr = LogisticRegression()
lr.fit(X_train_vectorized, y_train)
lr_y_pred = lr.predict(X_test_vectorized)

# Train the Support Vector Machine model
svm = LinearSVC()
svm.fit(X_train_vectorized, y_train)
svm_y_pred = svm.predict(X_test_vectorized)

# Evaluate the models
print('Naïve Bayes')
print('Accuracy:', accuracy_score(y_test, nb_y_pred))
print('Precision:', precision_score(y_test, nb_y_pred, average='weighted'))
print('Recall:', recall_score(y_test, nb_y_pred, average='weighted'))
print('')

print('Logistic Regression')
print('Accuracy:', accuracy_score(y_test, lr_y_pred))
print('Precision:', precision_score(y_test, lr_y_pred, average='weighted'))
print('Recall:', recall_score(y_test, lr_y_pred, average='weighted'))
print('')

print('Support Vector Machine')
print('Accuracy:', accuracy_score(y_test, svm_y_pred))
print('Precision:', precision_score(y_test, svm_y_pred, average='weighted'))
print('Recall:', recall_score(y_test, svm_y_pred, average='weighted'))

Naïve Bayes
Accuracy: 0.5833333333333334
Precision: 0.34027777777777785
Recall: 0.5833333333333334

Logistic Regression
Accuracy: 0.5833333333333334
Precision: 0.34027777777777785
Recall: 0.5833333333333334

Support Vector Machine
Accuracy: 0.5
Precision: 0.5
Recall: 0.5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
