In [None]:
!pip install -Uqq datasets
!pip install -Uqq scikit-learn

## Load dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("carblacac/twitter-sentiment-analysis")
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['text', 'feeling'],
        num_rows: 119988
    })
    validation: Dataset({
        features: ['text', 'feeling'],
        num_rows: 29997
    })
    test: Dataset({
        features: ['text', 'feeling'],
        num_rows: 61998
    })
})


## Split dataset

In [None]:
train_data = dataset["train"]
test_data = dataset["test"]

# sample data point
sample = train_data[0]
print("Sample:", sample)

Sample: {'text': '@fa6ami86 so happy that salman won.  btw the 14sec clip is truely a teaser', 'feeling': 0}


## Transform data

In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

train_text = [item['text'] for item in train_data]
train_labels = [item['feeling'] for item in train_data]

test_text = [item['text'] for item in test_data]
test_labels = [item['feeling'] for item in test_data]

# Vectorize
count_vectorizer = CountVectorizer()
X_train_counts = count_vectorizer.fit_transform(train_text)
X_test_counts = count_vectorizer.transform(test_text)

# TF-IDF representation
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)


## Train data using SVM classifier

In [None]:
from sklearn.svm import SVC

svc = SVC(kernel='linear')
svc.fit(X_train_tfidf, train_labels)
pred_labels = svc.predict(X_test_tfidf)


## Evaluate Model

In [None]:
from sklearn.metrics import accuracy_score, classification_report


print("Support Vector Machine Classifier:\n")

print(" Test Set Accuracy:", accuracy_score(test_labels, pred_labels))
print(" Classification Report:")
print(classification_report(test_labels, pred_labels))

Support Vector Machine Classifier:

 Test Set Accuracy: 0.7824929836446337
 Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.78      0.78     30969
           1       0.78      0.78      0.78     31029

    accuracy                           0.78     61998
   macro avg       0.78      0.78      0.78     61998
weighted avg       0.78      0.78      0.78     61998

