In [1]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup

In [3]:
Paris_1 = pd.read_csv('Coding_Paris_Week1.csv')
Paris_2 = pd.read_csv('Coding_Paris_Week2.csv')
Paris_3 = pd.read_csv('Coding_Paris_Week3.csv')
Paris_4 = pd.read_csv('Coding_Paris_Week4.csv')
Paris_5 = pd.read_csv('Coding_Paris_Week5.csv')
Shawn_1 = pd.read_csv('Coding_Shawn_Week1.csv')
Shawn_2 = pd.read_csv('Coding_Shawn_Week2.csv')
Shawn_3 = pd.read_csv('Coding_Shawn_Week3.csv')
Shawn_4 = pd.read_csv('Coding_Shawn_Week4.csv')
Shawn_5 = pd.read_csv('Coding_Shawn_Week5.csv')
Tianli_1 = pd.read_csv('Coding_Tianli_Week1.csv')
Tianli_2 = pd.read_csv('Coding_Tianli_Week2.csv')
Tianli_3 = pd.read_csv('Coding_Tianli_Week3.csv')
Tianli_4 = pd.read_csv('Coding_Tianli_Week4.csv')
Tianli_5 = pd.read_csv('Coding_Tianli_Week5.csv')

In [4]:
# Combine all the data frames into one
combined_df = pd.concat([
    Paris_1, Paris_2, Paris_3, Paris_4, Paris_5,
    Shawn_1, Shawn_2, Shawn_3, Shawn_4, Shawn_5,
    Tianli_1, Tianli_2, Tianli_3, Tianli_4, Tianli_5
], ignore_index=True)

combined_df['Sentiment'] = combined_df['Sentiment'].astype(int)



# Check the combined data frame
print(combined_df)

                                                   Tweet  Sentiment
0      RT @biancale_monash ATTN: Aus women interested...          3
1      The future will be full of lab grown meat: htt...          1
2      The Future Of Meat: 45 In Vitro Meat Recipes Y...          4
3      Some makers of lab-grown meat have adopted a c...          3
4                Lab grown meat doesn’t sit well with me          2
...                                                  ...        ...
22973  I've unexpectedly ended up with David Lewis on...          3
22974      cheap cultivated meat https://t.co/hsXLscDaVS          3
22975  @BobsBlog I mean to be clear, it depends exact...          3
22976  The market for cultured meat is no joke (prese...          3
22977  @guardiannews LAB-GROWN MEAT HITS A MAJOR MILE...          3

[22978 rows x 2 columns]


In [5]:
# 1. Load the data
df = combined_df
tweets = df['Tweet'].tolist()
labels = [label-1 for label in df['Sentiment'].tolist()]  # Adjusting labels from 1-4 to 0-3 because the outputs of BERT start from 0

# 2. Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, tweets, labels, tokenizer, max_len):
        self.tweets = tweets
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.tweets)

    def __getitem__(self, item):
        tweet = str(self.tweets[item])
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            tweet,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'tweet_text': tweet,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# 3. Split the data into training and validation sets
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 3

dataset = SentimentDataset(tweets=tweets, labels=labels, tokenizer=tokenizer, max_len=MAX_LEN)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# 4. Load the BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define optimizer, scheduler, and loss
optimizer = AdamW(model.parameters(), lr=2e-5)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
loss_fn = torch.nn.CrossEntropyLoss().to(device)

# 6. Train the model
for epoch in range(EPOCHS):
    model.train()
    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, preds = torch.max(outputs.logits, dim=1)
        loss = loss_fn(outputs.logits, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()

# ... (Validation and Evaluation steps can be added here)

# 7. Save the model
model.save_pretrained("./sentiment_model")
tokenizer.save_pretrained("./sentiment_model")

Downloading (…)"vocab.txt";:   0%|          | 0.00/232k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)enizer_config.json";:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)"config.json";:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

('./sentiment_model\\tokenizer_config.json',
 './sentiment_model\\special_tokens_map.json',
 './sentiment_model\\vocab.txt',
 './sentiment_model\\added_tokens.json')

In [17]:
tweet = "I hate meat, hope CM never come"
# Tokenize the tweet and obtain input ids and attention masks
inputs = tokenizer.encode_plus(tweet, add_special_tokens=True, max_length=512, pad_to_max_length=True, return_attention_mask=True, return_tensors="pt")

input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

model.eval()

# Make the prediction
with torch.no_grad():
    outputs = model(input_ids, attention_mask=attention_mask)
    logits = outputs[0]
    predicted_label = torch.argmax(logits, dim=1).item()

print(f"The predicted sentiment label for the tweet is: {predicted_label}")

The predicted sentiment label for the tweet is: 1


In [18]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

# Function to evaluate the model
def evaluate_model(model, data_loader, device):
    model.eval()  # set model to evaluation mode

    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)

            predictions.extend(preds.tolist())
            true_labels.extend(labels.tolist())

    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
    
    return accuracy, precision, recall, f1

# After training your model, evaluate it
accuracy, precision, recall, f1 = evaluate_model(model, val_loader, device)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")



Accuracy: 0.7893820713664056
Precision: 0.7970154162955836
Recall: 0.7893820713664056
F1 Score: 0.7924942472105224


In [22]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

def evaluate_model(model, val_loader, device, loss_fn):
    model.eval()
    total_loss, total_accuracy, total_precision, total_recall, total_f1 = 0, 0, 0, 0, 0
    total_examples = 0

    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

            total_examples += labels.size(0)

    if total_examples > 0:
        total_accuracy = accuracy_score(true_labels, predictions)
        total_precision = precision_score(true_labels, predictions, average='weighted')
        total_recall = recall_score(true_labels, predictions, average='weighted')
        total_f1 = f1_score(true_labels, predictions, average='weighted')
    else:
        print("No examples to evaluate")

    return total_loss / total_examples, total_accuracy, total_precision, total_recall, total_f1


In [26]:
import torch
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import KFold

# 1. Load the data
df = combined_df
tweets = df['Tweet'].tolist()
labels = [label - 1 for label in df['Sentiment'].tolist()]  # Adjusting labels from 1-4 to 0-3

# 2. Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, tweets, labels, tokenizer, max_len):
        self.tweets = tweets
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.tweets)

    def __getitem__(self, item):
        tweet = str(self.tweets[item])
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            tweet,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'tweet_text': tweet,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 3
SEED = 2024

# Create dataset
dataset = SentimentDataset(tweets=tweets, labels=labels, tokenizer=tokenizer, max_len=MAX_LEN)

# 3. Set up K-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=SEED)

for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f"Starting fold {fold + 1}")

    # Split dataset
    train_subsampler = SubsetRandomSampler(train_idx)
    val_subsampler = SubsetRandomSampler(val_idx)

    train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_subsampler)
    val_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=val_subsampler)

    # 4. Load the BERT model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4).to(device)

    # 5. Define optimizer, scheduler, and loss
    optimizer = AdamW(model.parameters(), lr=2e-5)
    total_steps = len(train_loader) * EPOCHS
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    loss_fn = torch.nn.CrossEntropyLoss().to(device)

    # 6. Train the model
    for epoch in range(EPOCHS):
        model.train()
        for batch in train_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            model.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            scheduler.step()

        # Validation step.
        val_loss, val_accuracy, val_precision, val_recall, val_f1 = evaluate_model(model, val_loader, device, loss_fn)
        print(f"Fold {fold + 1}, Epoch {epoch + 1}, Validation Loss: {val_loss}, Accuracy: {val_accuracy}, Precision: {val_precision}, Recall: {val_recall}, F1: {val_f1}")


    # 7. Save the model for each fold
    model.save_pretrained(f"./sentiment_model_fold_{fold + 1}")

# After training, you can aggregate the results from each fold to evaluate overall performance

Starting fold 1


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 1, Epoch 1, Validation Loss: 0.032080561081490586, Accuracy: 0.8052654482158399, Precision: 0.8042450078318993, Recall: 0.8052654482158399, F1: 0.7994799982666615




Fold 1, Epoch 2, Validation Loss: 0.03250685527440877, Accuracy: 0.8037423846823325, Precision: 0.8094594490087942, Recall: 0.8037423846823325, F1: 0.8062005820271365




Fold 1, Epoch 3, Validation Loss: 0.0354097801843436, Accuracy: 0.8006962576153177, Precision: 0.805000310251159, Recall: 0.8006962576153177, F1: 0.8022759889078884
Starting fold 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 2, Epoch 1, Validation Loss: 0.03353814608226972, Accuracy: 0.7872062663185379, Precision: 0.7669523724630135, Recall: 0.7872062663185379, F1: 0.7765713417574486
Fold 2, Epoch 2, Validation Loss: 0.034172566118832515, Accuracy: 0.7928633594429939, Precision: 0.7910538667739367, Recall: 0.7928633594429939, F1: 0.7876546450235292




Fold 2, Epoch 3, Validation Loss: 0.03755014814562881, Accuracy: 0.7878590078328982, Precision: 0.7893450904494999, Recall: 0.7878590078328982, F1: 0.7883337804664382
Starting fold 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 3, Epoch 1, Validation Loss: 0.031753523646488306, Accuracy: 0.7987380330722367, Precision: 0.786828830407823, Recall: 0.7987380330722367, F1: 0.7922776716634574
Fold 3, Epoch 2, Validation Loss: 0.03201892806964402, Accuracy: 0.7991731940818103, Precision: 0.7919643873941704, Recall: 0.7991731940818103, F1: 0.7939860382226765




Fold 3, Epoch 3, Validation Loss: 0.0365482566900895, Accuracy: 0.7882941688424717, Precision: 0.7936846380554521, Recall: 0.7882941688424717, F1: 0.7900919169539148
Starting fold 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 4, Epoch 1, Validation Loss: 0.034856669084291594, Accuracy: 0.7795429815016323, Precision: 0.7557126187380251, Recall: 0.7795429815016323, F1: 0.7584954829758362
Fold 4, Epoch 2, Validation Loss: 0.03586813185497778, Accuracy: 0.7723612622415669, Precision: 0.7735014152799639, Recall: 0.7723612622415669, F1: 0.7704269910474052




Fold 4, Epoch 3, Validation Loss: 0.040652452306305105, Accuracy: 0.7695321001088139, Precision: 0.7732480947829171, Recall: 0.7695321001088139, F1: 0.7710780700252757
Starting fold 5


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Fold 5, Epoch 1, Validation Loss: 0.03370033891403377, Accuracy: 0.7873775843307943, Precision: 0.7794208401220558, Recall: 0.7873775843307943, F1: 0.781825409340064
Fold 5, Epoch 2, Validation Loss: 0.03337555696480422, Accuracy: 0.7939064200217628, Precision: 0.7899363110496019, Recall: 0.7939064200217628, F1: 0.7901615263621985




Fold 5, Epoch 3, Validation Loss: 0.03723042017463048, Accuracy: 0.7906420021762786, Precision: 0.7916477309998189, Recall: 0.7906420021762786, F1: 0.7895917608275738


In [25]:
results = []
for fold in range(1, 6):
    # load pre-trained models
    model = BertForSequenceClassification.from_pretrained(f"./sentiment_model_fold_{fold}").to(device)
    
    # DataLoader
    _, val_idx = next(iter(kf.split(dataset)))
    val_subsampler = SubsetRandomSampler(val_idx)
    val_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=val_subsampler)

    # Validation
    val_loss, val_accuracy, val_precision, val_recall, val_f1 = evaluate_model(model, val_loader, device, loss_fn)
    results.append({
        'fold': fold,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'val_precision': val_precision,
        'val_recall': val_recall,
        'val_f1': val_f1
    })

# Results
for result in results:
    print(f"Fold {result['fold']} - Loss: {result['val_loss']}, Accuracy: {result['val_accuracy']}, Precision: {result['val_precision']}, Recall: {result['val_recall']}, F1: {result['val_f1']}")



Fold 1 - Loss: 0.03562552434877793, Accuracy: 0.8028720626631853, Precision: 0.8056423162981777, Recall: 0.8028720626631853, F1: 0.8031297988105316
Fold 2 - Loss: 0.012331060904302709, Accuracy: 0.9410356832027851, Precision: 0.9421367092699812, Recall: 0.9410356832027851, F1: 0.9414341893254281
Fold 3 - Loss: 0.012316833922576266, Accuracy: 0.9373368146214099, Precision: 0.9384542873677293, Recall: 0.9373368146214099, F1: 0.9374823662441902
Fold 4 - Loss: 0.011868356243975533, Accuracy: 0.9425587467362925, Precision: 0.9441865311451921, Recall: 0.9425587467362925, F1: 0.9427889144451668
Fold 5 - Loss: 0.012518772966851738, Accuracy: 0.9321148825065274, Precision: 0.9346667240913987, Recall: 0.9321148825065274, F1: 0.93286848646533


# Support Vector Machine

In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

df = combined_df
#tweets = df['Tweet'].tolist()
#labels = [label - 1 for label in df['Sentiment'].tolist()]  # Adjusting labels from 1-4 to 0-3

# TF-IDF
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['Tweet'])


y = df['Sentiment']

# 
svm = SVC()

# 
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='weighted'),
    'recall': make_scorer(recall_score, average='weighted'),
    'f1': make_scorer(f1_score, average='weighted')
}

# 
scores = cross_validate(svm, X, y, scoring=scoring, cv=5)

# 
for metric in scores:
    if metric.startswith('test_'):
        print(f"{metric}: {scores[metric].mean()}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test_accuracy: 0.7410128029198877
test_precision: 0.7182506757088543
test_recall: 0.7410128029198877
test_f1: 0.703542200511795


  _warn_prf(average, modifier, msg_start, len(result))
