In [1]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup

In [3]:
Paris_1 = pd.read_csv('Coding_Paris_Week1.csv')
Paris_2 = pd.read_csv('Coding_Paris_Week2.csv')
Paris_3 = pd.read_csv('Coding_Paris_Week3.csv')
Paris_4 = pd.read_csv('Coding_Paris_Week4.csv')
Paris_5 = pd.read_csv('Coding_Paris_Week5.csv')
Shawn_1 = pd.read_csv('Coding_Shawn_Week1.csv')
Shawn_2 = pd.read_csv('Coding_Shawn_Week2.csv')
Shawn_3 = pd.read_csv('Coding_Shawn_Week3.csv')
Shawn_4 = pd.read_csv('Coding_Shawn_Week4.csv')
Shawn_5 = pd.read_csv('Coding_Shawn_Week5.csv')
Tianli_1 = pd.read_csv('Coding_Tianli_Week1.csv')
Tianli_2 = pd.read_csv('Coding_Tianli_Week2.csv')
Tianli_3 = pd.read_csv('Coding_Tianli_Week3.csv')
Tianli_4 = pd.read_csv('Coding_Tianli_Week4.csv')
Tianli_5 = pd.read_csv('Coding_Tianli_Week5.csv')

In [4]:
# Combine all the data frames into one
combined_df = pd.concat([
    Paris_1, Paris_2, Paris_3, Paris_4, Paris_5,
    Shawn_1, Shawn_2, Shawn_3, Shawn_4, Shawn_5,
    Tianli_1, Tianli_2, Tianli_3, Tianli_4, Tianli_5
], ignore_index=True)

combined_df['Sentiment'] = combined_df['Sentiment'].astype(int)



# Check the combined data frame
print(combined_df)

                                                   Tweet  Sentiment
0      RT @biancale_monash ATTN: Aus women interested...          3
1      The future will be full of lab grown meat: htt...          1
2      The Future Of Meat: 45 In Vitro Meat Recipes Y...          4
3      Some makers of lab-grown meat have adopted a c...          3
4                Lab grown meat doesn’t sit well with me          2
...                                                  ...        ...
22973  I've unexpectedly ended up with David Lewis on...          3
22974      cheap cultivated meat https://t.co/hsXLscDaVS          3
22975  @BobsBlog I mean to be clear, it depends exact...          3
22976  The market for cultured meat is no joke (prese...          3
22977  @guardiannews LAB-GROWN MEAT HITS A MAJOR MILE...          3

[22978 rows x 2 columns]


In [6]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

def evaluate_model(model, val_loader, device, loss_fn):
    model.eval()
    total_loss, total_accuracy, total_precision, total_recall, total_f1 = 0, 0, 0, 0, 0
    total_examples = 0

    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

            total_examples += labels.size(0)

    if total_examples > 0:
        total_accuracy = accuracy_score(true_labels, predictions)
        total_precision = precision_score(true_labels, predictions, average='macro')
        total_recall = recall_score(true_labels, predictions, average='macro')
        total_f1 = f1_score(true_labels, predictions, average='macro')
    else:
        print("No examples to evaluate")

    return total_loss / total_examples, total_accuracy, total_precision, total_recall, total_f1


## Sentiment analysis on unlabeled data with BERT No.1

### preprocessing

In [6]:
text = combined_df
import gensim
import gensim.corpora as corpora
import re
b = []
for i,u in text.iterrows():
    a = []
    word =''
    for words in str(u['Tweet']).split(): #tokenization
        if '@' not in words: #remove @users
            words = words.replace('#','') #remove hashtag symbol
            if '#' not in words:
                if 'http' not in words: #remove URLs
                    if'&amp' not in words: #remove symbol
                        if 'RT' not in words:
                            word += (words+' ')
    doc = ''
    for token in word.split():
        token = token.lower()
        doc += (token+' ')
    b.append(doc)
text['processed']=[i for i in b]

In [14]:
from transformers import BertTokenizer

# initialize BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading (…)"vocab.txt";:   0%|          | 0.00/232k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)enizer_config.json";:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading (…)"config.json";:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [16]:
# 1. Load the data
df = text
tweets = df['processed'].tolist()

# 2. Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, tweets, labels, tokenizer, max_len):
        self.tweets = tweets
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.tweets)

    def __getitem__(self, item):
        tweet = str(self.tweets[item])
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            tweet,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'tweet_text': tweet,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 3
SEED = 2024

# Create dataset
new_tweets = text['processed']
new_dataset = NewSentimentDataset(tweets=new_tweets, tokenizer=tokenizer, max_len=MAX_LEN)
new_loader = DataLoader(new_dataset, batch_size=BATCH_SIZE)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # 确保模型在正确的设备上

predictions = []
model.eval()  # 设置模型为评估模式

with torch.no_grad():
    for batch in new_l2oader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions.append(logits.argmax(dim=-1).cpu().numpy())

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


# Evaluation using macro precision, macro recall, and macro f-1 score

In [10]:
# 1. Load the data
df = text
tweets = df['processed'].tolist()
labels = [label - 1 for label in df['Sentiment'].tolist()]  # Adjusting labels from 1-4 to 0-3

# 2. Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, tweets, labels, tokenizer, max_len):
        self.tweets = tweets
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.tweets)

    def __getitem__(self, item):
        tweet = str(self.tweets[item])
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            tweet,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'tweet_text': tweet,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 3
SEED = 2024

# Create dataset
dataset = SentimentDataset(tweets=tweets, labels=labels, tokenizer=tokenizer, max_len=MAX_LEN)

# 3. Set up K-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=SEED)

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f"Starting fold {fold + 1}")

    # Split dataset
    train_subsampler = SubsetRandomSampler(train_idx)
    val_subsampler = SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_subsampler)
    val_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=val_subsampler)

    # 4. Load the BERT model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4).to(device)

    # 5. Define optimizer, scheduler, and loss
    optimizer = AdamW(model.parameters(), lr=2e-5)
    total_steps = len(train_loader) * EPOCHS
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    loss_fn = torch.nn.CrossEntropyLoss().to(device)

    # 6. Train the model
    for epoch in range(EPOCHS):
        model.train()
        for batch in train_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            model.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            scheduler.step()

        # Validation step.
        val_loss, val_accuracy, val_precision, val_recall, val_f1 = evaluate_model(model, val_loader, device, loss_fn)
        print(f"Fold {fold + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss}, Accuracy: {val_accuracy}, Precision: {val_precision}, Recall: {val_recall}, F1: {val_f1}")


    # 7. Save the model for each fold
    model.save_pretrained(f"./sentiment_model_fold_{fold + 1}")

# After training, you can aggregate the results from each fold to evaluate overall performance

Starting fold 1


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/440M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassificatio

Fold 1, Epoch 1, Validation Loss: 0.03288293914387961, Accuracy: 0.799390774586597, Precision: 0.5679390403338229, Recall: 0.5112623125268916, F1: 0.524037996704527




Fold 1, Epoch 2, Validation Loss: 0.032126244018138024, Accuracy: 0.804177545691906, Precision: 0.6493429196224885, Recall: 0.5792760544366359, F1: 0.5968659135221724




Fold 1, Epoch 3, Validation Loss: 0.03579650123362908, Accuracy: 0.7987380330722367, Precision: 0.616041249118473, Recall: 0.619719320700241, F1: 0.6173752671358909
Starting fold 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 2, Epoch 1, Validation Loss: 0.03443822078893141, Accuracy: 0.7874238468233247, Precision: 0.6952757622770828, Recall: 0.521444738901128, F1: 0.5400953322635629




Fold 2, Epoch 2, Validation Loss: 0.033717406723969286, Accuracy: 0.7926457789382071, Precision: 0.6705893524198218, Recall: 0.564848486597263, F1: 0.5740120753080088




Fold 2, Epoch 3, Validation Loss: 0.03769065660089773, Accuracy: 0.7928633594429939, Precision: 0.6275383364972739, Recall: 0.597170693902677, F1: 0.6090342224906525
Starting fold 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 3, Epoch 1, Validation Loss: 0.03250013263790736, Accuracy: 0.79177545691906, Precision: 0.542263174764219, Recall: 0.5216915678453837, F1: 0.5294903248574794
Fold 3, Epoch 2, Validation Loss: 0.03307702923412423, Accuracy: 0.7906875543951262, Precision: 0.6305110528360781, Recall: 0.5739018151410826, F1: 0.5659539088594915




Fold 3, Epoch 3, Validation Loss: 0.036364358701492724, Accuracy: 0.7932985204525674, Precision: 0.6076225668219377, Recall: 0.5919520318946481, F1: 0.5970093581447017
Starting fold 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 4, Epoch 1, Validation Loss: 0.034790386222262375, Accuracy: 0.780195865070729, Precision: 0.5218033076592618, Recall: 0.5201546764928302, F1: 0.5206896053231083
Fold 4, Epoch 2, Validation Loss: 0.03546865320114887, Accuracy: 0.7878128400435256, Precision: 0.7287095343019375, Recall: 0.537437796808542, F1: 0.5494175933623558




Fold 4, Epoch 3, Validation Loss: 0.03828659966502894, Accuracy: 0.7825897714907508, Precision: 0.6257033747088557, Recall: 0.6161650803882988, F1: 0.6172194380158269
Starting fold 5


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 5, Epoch 1, Validation Loss: 0.03380360803419929, Accuracy: 0.7891186071817192, Precision: 0.5352255051674748, Recall: 0.513294929765518, F1: 0.5223037999965959
Fold 5, Epoch 2, Validation Loss: 0.03343355022005988, Accuracy: 0.7932535364526659, Precision: 0.6420318453936306, Recall: 0.5694618502442603, F1: 0.5688800114716692




Fold 5, Epoch 3, Validation Loss: 0.036075882258965866, Accuracy: 0.7899891186071817, Precision: 0.6306031728117026, Recall: 0.586836916871183, F1: 0.5938103325257458


# Support Vector Machine weighted-averaging

In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

df = combined_df
#tweets = df['Tweet'].tolist()
#labels = [label - 1 for label in df['Sentiment'].tolist()]  # Adjusting labels from 1-4 to 0-3

# TF-IDF
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['Tweet'])


y = df['Sentiment']

# 
svm = SVC()

# 
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='weighted'),
    'recall': make_scorer(recall_score, average='weighted'),
    'f1': make_scorer(f1_score, average='weighted')
}

# 
scores = cross_validate(svm, X, y, scoring=scoring, cv=5)

# 
for metric in scores:
    if metric.startswith('test_'):
        print(f"{metric}: {scores[metric].mean()}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test_accuracy: 0.7410128029198877
test_precision: 0.7182506757088543
test_recall: 0.7410128029198877
test_f1: 0.703542200511795


  _warn_prf(average, modifier, msg_start, len(result))


# SVM macro metrics

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

df = combined_df
#tweets = df['Tweet'].tolist()
#labels = [label - 1 for label in df['Sentiment'].tolist()]  # Adjusting labels from 1-4 to 0-3

# TF-IDF
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['processed'])


y = df['Sentiment']

# 
svm = SVC()

# 
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro')
}

# 
scores = cross_validate(svm, X, y, scoring=scoring, cv=5)

# 
for metric in scores:
    if metric.startswith('test_'):
        print(f"{metric}: {scores[metric].mean()}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test_accuracy: 0.7453211904944548
test_precision: 0.5383025319252142
test_recall: 0.4105667765317607
test_f1: 0.4329637904404889


  _warn_prf(average, modifier, msg_start, len(result))


# VADER

In [9]:
pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2
Note: you may need to restart the kernel to use updated packages.


In [14]:
text = pd.read_csv('labeled_predicted_0207.csv')

In [15]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# function to print sentiments
# of the sentence.
def sentiment_scores(sentence):

    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()

    # polarity_scores method of SentimentIntensityAnalyzer
    # object gives a sentiment dictionary.
    # which contains pos, neg, neu, and compound scores.
    sentiment_dict = sid_obj.polarity_scores(sentence)

    #print(sentence)
    #print("Overall sentiment dictionary is : ", sentiment_dict)
    #print("sentence was rated as ", sentiment_dict['neg']*100, "% Negative")
    #print("sentence was rated as ", sentiment_dict['neu']*100, "% Neutral")
    #print("sentence was rated as ", sentiment_dict['pos']*100, "% Positive")

    #print("Sentence Overall Rated As", end = " ")

    # decide sentiment as positive, negative and neutral
    if sentiment_dict['compound'] >= 0.05 :
        #print("Positive")
        return 'Positive', sentiment_dict['compound']

    elif sentiment_dict['compound'] <= - 0.05 :
        #print("Negative")
        return 'Negative', sentiment_dict['compound']

    else :
        #print("Neutral")
        #print(sentiment_dict['compound'])
        return 'Neutral', sentiment_dict['compound']
    
b = []
for i,u in text.iterrows():
    a = []
    word =''
    for words in str(u['Tweet']).split(): #tokenization
        if '@' not in words: #remove @users
            words = words.replace('#','') #remove hashtag symbol
            if '#' not in words:
                if 'http' not in words: #remove URLs
                    if'&amp' not in words: #remove symbol
                        if 'RT' not in words:
                            word += (words+' ')
    doc = ''
    for token in word.split():
        token = token.lower()
        doc += (token+' ')
    b.append(doc)
text['processed']=[i for i in b]


sampled_df = text
import time
start = time.time()
sentiment_ = []
for i, v in sampled_df.iterrows():
    sentiment_.append(sentiment_scores(v['processed']))
end = time.time()
print('time cost to process tweets:', end-start)

sentiment_0 = []
sentiment_1 = []
for i in range(len(sentiment_)):
    sentiment_0.append(sentiment_[i][0])
    sentiment_1.append(sentiment_[i][1])

sampled_df['sentiment_'] = [i for i in sentiment_0]
sampled_df['sentiment score_'] = [i for i in sentiment_1]

sentiment_mapping = {
    "Positive": 1,
    "Negative": 2,
    "Neutral": 3
}


sampled_df['sentiment'] = sampled_df['sentiment_'].replace(sentiment_mapping)
df = sampled_df.loc[sampled_df['Sentiment']!= 4]
df['Sentiment'] = df['Sentiment'].astype(int)
df['sentiment'] = df['sentiment'].astype(int)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(df['Sentiment'], df['sentiment'])
precision = precision_score(df['Sentiment'], df['sentiment'], average='macro')
recall = recall_score(df['Sentiment'], df['sentiment'], average='macro')
f1 = f1_score(df['Sentiment'], df['sentiment'], average='macro')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

time cost to process tweets: 41.8282470703125
Accuracy: 0.5133214920071048
Precision: 0.4777335232892705
Recall: 0.5308029492128866
F1 Score: 0.4672532106503355


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Sentiment'] = df['Sentiment'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = df['sentiment'].astype(int)
