In [None]:
!pip install datasets
from datasets import load_dataset
from datasets import load_dataset
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import precision_score, recall_score, f1_score





In [None]:
ds = load_dataset("alexcadillon/SemEval2016Task5", "restaurants")
ds

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


SemEval2016Task5.py:   0%|          | 0.00/12.9k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/146k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/58.1k [00:00<?, ?B/s]

Generating trial split:   0%|          | 0/10 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/350 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/90 [00:00<?, ? examples/s]

DatasetDict({
    trial: Dataset({
        features: ['reviewId', 'sentences', 'opinions'],
        num_rows: 10
    })
    train: Dataset({
        features: ['reviewId', 'sentences', 'opinions'],
        num_rows: 350
    })
    test: Dataset({
        features: ['reviewId', 'sentences', 'opinions'],
        num_rows: 90
    })
})

# Baseline

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
import pandas as pd
from datasets import load_dataset

def load_and_prepare_data():
    ds = load_dataset("alexcadillon/SemEval2016Task5", "restaurants")

    def extract_data(data):
        extracted_data = []
        for item in data:
            for sentence in item['sentences']:
                text = sentence['text']
                opinions = sentence.get('opinions', [])
                opinion_tuples = [
                    (opinion['category']['entity'], opinion['category']['attribute'], opinion['polarity'])
                    for opinion in opinions
                ]
                extracted_data.append({
                    'sentenceId': sentence['sentenceId'],
                    'text': text,
                    'opinions': opinion_tuples
                })
        return pd.DataFrame(extracted_data)

    return extract_data(ds['train']), extract_data(ds['test'])


def prepare_training_data(data):
    rows = []
    for _, row in data.iterrows():
        text = row['text']
        for entity, attribute, _ in row['opinions']:
            rows.append({'text': text, 'category': f"{entity}#{attribute}"})
    return pd.DataFrame(rows)

# Evaluate predictions
def evaluate_predictions(predictions, ground_truth):
    total_sentences = len(ground_truth)
    exact_match_correct = sum([set(pred) == set(actual) for pred, actual in zip(predictions, ground_truth)])
    accuracy = exact_match_correct / total_sentences

    # Compute precision, recall, and F1
    true_positive = 0
    false_positive = 0
    false_negative = 0

    for pred, actual in zip(predictions, ground_truth):
        pred_set = set(pred)
        actual_set = set(actual)

        true_positive += len(pred_set & actual_set)  # Intersection
        false_positive += len(pred_set - actual_set)  # Predicted but not actual
        false_negative += len(actual_set - pred_set)  # Actual but not predicted
    print(true_positive, false_positive, false_negative)
    precision = true_positive / (true_positive + false_positive) if true_positive + false_positive > 0 else 0
    recall = true_positive / (true_positive + false_negative) if true_positive + false_negative > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0

    return accuracy, precision, recall, f1_score
# Load and preprocess data
train_data, test_data = load_and_prepare_data()
training_data = prepare_training_data(train_data)
print(len(training_data))
# Feature extraction with TF-IDF
tfidf = TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 1))
X_train = tfidf.fit_transform(training_data['text'])
y_train = training_data['category']
# Train SVM with probability calibration
base_svm = LinearSVC(random_state=42)
svm = CalibratedClassifierCV(base_svm)
svm.fit(X_train, y_train)
# Predict categories with probabilities
def predict_categories(texts, threshold=0.2):
    X_texts = tfidf.transform(texts)
    y_prob = svm.predict_proba(X_texts)
    categories = svm.classes_
    predictions = []
    for probs in y_prob:
        assigned = [categories[i] for i, p in enumerate(probs) if p >= threshold]
        predictions.append(assigned)
    return predictions
# Predict and evaluate
test_texts = test_data['text']
ground_truth = [
    list(set(f"{entity}#{attribute}" for entity, attribute, _ in opinions))
    for opinions in test_data['opinions']
]
predicted_categories = predict_categories(test_texts)
accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
# Example output
results = pd.DataFrame({
    'Text': test_texts,
    'True Categories': ground_truth,
    'Predicted Categories': predicted_categories
})
print(results.head())

2507
531 457 212
Accuracy: 0.3831
Precision: 0.5374
Recall: 0.7147
F1-Score: 0.6135
                                     Text       True Categories  \
0                                    Yum!        [FOOD#QUALITY]   
1               Serves really good sushi.        [FOOD#QUALITY]   
2  Not the biggest portions but adequate.  [FOOD#STYLE_OPTIONS]   
3       Green Tea creme brulee is a must!        [FOOD#QUALITY]   
4  Don't leave the restaurant without it.        [FOOD#QUALITY]   

   Predicted Categories  
0        [FOOD#QUALITY]  
1        [FOOD#QUALITY]  
2  [FOOD#STYLE_OPTIONS]  
3        [FOOD#QUALITY]  
4  [RESTAURANT#GENERAL]  


In [19]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# from google.colab import drive
# drive.flush_and_unmount()

Mounted at /content/drive


## 1 VS REST

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import pandas as pd
from datasets import load_dataset

# Load and prepare dataset
def load_and_prepare_data():
    ds = load_dataset("alexcadillon/SemEval2016Task5", "restaurants")

    def extract_data(data):
        extracted_data = []
        for item in data:
            for sentence in item['sentences']:
                text = sentence['text']
                opinions = sentence.get('opinions', [])
                opinion_tuples = [
                    f"{opinion['category']['entity']}#{opinion['category']['attribute']}"
                    for opinion in opinions
                ]
                extracted_data.append({
                    'sentenceId': sentence['sentenceId'],
                    'text': text,
                    'categories': opinion_tuples
                })
        return pd.DataFrame(extracted_data)

    return extract_data(ds['train']), extract_data(ds['test'])


def main():
    train_data, test_data = load_and_prepare_data()
    print(len(train_data))

    mlb = MultiLabelBinarizer()
    y_train = mlb.fit_transform(train_data['categories'])
    y_test = mlb.transform(test_data['categories'])

    tfidf = TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 1))

    X_train = tfidf.fit_transform(train_data['text'])
    X_test = tfidf.transform(test_data['text'])

    base_svm = LinearSVC(random_state=42)
    classifier = OneVsRestClassifier(base_svm)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    predicted_categories = mlb.inverse_transform(y_pred)
    ground_truth = mlb.inverse_transform(y_test)


    accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")


    # Compute detailed metrics and overall accuracy
    metrics = classification_report(y_test, y_pred, target_names=mlb.classes_, zero_division=0)
    accuracy = accuracy_score(y_test, y_pred)
    print(metrics)
    print(accuracy)

    results = pd.DataFrame({
        'Text': test_data['text'],
        'True Categories': ground_truth,
        'Predicted Categories': predicted_categories
    })
    print(results.head())

print("1 VS Rest Classifier")
main()

1 VS Rest Classifier
2000
404 135 339
Accuracy: 0.4734
Precision: 0.7495
Recall: 0.5437
F1-Score: 0.6303
                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.87      0.58      0.69        57
           DRINKS#PRICES       0.00      0.00      0.00         3
          DRINKS#QUALITY       0.43      0.14      0.21        21
    DRINKS#STYLE_OPTIONS       0.67      0.33      0.44        12
             FOOD#PRICES       0.62      0.23      0.33        22
            FOOD#QUALITY       0.76      0.72      0.74       226
      FOOD#STYLE_OPTIONS       0.62      0.31      0.42        48
        LOCATION#GENERAL       0.60      0.23      0.33        13
      RESTAURANT#GENERAL       0.63      0.46      0.54       142
RESTAURANT#MISCELLANEOUS       0.57      0.12      0.20        33
       RESTAURANT#PRICES       0.57      0.19      0.29        21
         SERVICE#GENERAL       0.87      0.72      0.79       145

               micro avg       0.75

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.pipeline import make_pipeline
import pandas as pd
from datasets import load_dataset

# Load and prepare dataset
def load_and_prepare_data():
    ds = load_dataset("alexcadillon/SemEval2016Task5", "restaurants")

    def extract_data(data):
        extracted_data = []
        for item in data:
            for sentence in item['sentences']:
                text = sentence['text']
                opinions = sentence.get('opinions', [])
                opinion_tuples = [
                    f"{opinion['category']['entity']}#{opinion['category']['attribute']}"
                    for opinion in opinions
                ]
                extracted_data.append({
                    'sentenceId': sentence['sentenceId'],
                    'text': text,
                    'categories': opinion_tuples
                })
        return pd.DataFrame(extracted_data)

    return extract_data(ds['train']), extract_data(ds['test'])

def try_multiple_models(train_data, test_data, models):
    # Transform target data
    mlb = MultiLabelBinarizer()
    y_train = mlb.fit_transform(train_data['categories'])
    y_test = mlb.transform(test_data['categories'])

    # TF-IDF features
    tfidf = TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 1))
    X_train = tfidf.fit_transform(train_data['text'])
    X_test = tfidf.transform(test_data['text'])

    # Try multiple classifiers
    results = []
    for model_name, model in models.items():
        print(f"\nTraining {model_name}...")
        classifier = OneVsRestClassifier(model)
        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_test)

        # Compute detailed metrics and overall accuracy
        metrics = classification_report(y_test, y_pred, target_names=mlb.classes_, zero_division=0)
        accuracy = accuracy_score(y_test, y_pred)
        print(metrics)


        # Decode predictions and ground truth
        predicted_categories = mlb.inverse_transform(y_pred)
        ground_truth = mlb.inverse_transform(y_test)

        # Evaluate predictions
        accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)
        print(f"{model_name} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

        results.append({
            'Model': model_name,
            'accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1
        })

    # Display results
    results_df = pd.DataFrame(results)
    print("\nSummary of Results:")
    print(results_df)
    return results_df

def main():
    train_data, test_data = load_and_prepare_data()
    print(len(train_data))

    # Define models to try
    models = {
        'LinearSVC': LinearSVC(random_state=42),
        'RandomForest': RandomForestClassifier(random_state=42, n_estimators=100),
        'LogisticRegression': LogisticRegression(random_state=42, max_iter=1000),
        'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3)
    }

    try_multiple_models(train_data, test_data, models)

print("Trying multiple models with OneVsRest strategy")
main()


Trying multiple models with OneVsRest strategy
2000

Training LinearSVC...
                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.87      0.58      0.69        57
           DRINKS#PRICES       0.00      0.00      0.00         3
          DRINKS#QUALITY       0.43      0.14      0.21        21
    DRINKS#STYLE_OPTIONS       0.67      0.33      0.44        12
             FOOD#PRICES       0.62      0.23      0.33        22
            FOOD#QUALITY       0.76      0.72      0.74       226
      FOOD#STYLE_OPTIONS       0.62      0.31      0.42        48
        LOCATION#GENERAL       0.60      0.23      0.33        13
      RESTAURANT#GENERAL       0.63      0.46      0.54       142
RESTAURANT#MISCELLANEOUS       0.57      0.12      0.20        33
       RESTAURANT#PRICES       0.57      0.19      0.29        21
         SERVICE#GENERAL       0.87      0.72      0.79       145

               micro avg       0.75      0.54      0.63       743

## Feature

In [None]:
import numpy as np
from gensim.models import KeyedVectors
import gensim.downloader as api

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.pipeline import make_pipeline
import pandas as pd
from datasets import load_dataset
from scipy.sparse import hstack
from sklearn.metrics import classification_report
import spacy

nlp = spacy.load("en_core_web_sm")
word_vectors = api.load('glove-wiki-gigaword-50')

models = {
    'LinearSVC': LinearSVC(random_state=42),
    'RandomForest': RandomForestClassifier(random_state=42, n_estimators=100),
    'LogisticRegression': LogisticRegression(random_state=42, max_iter=1000),
    'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3)
}

def extract_mean_word2vec(texts, word2vec_model):
    embeddings = []
    for text in texts:
        doc = nlp(text.lower())
        vectors = [word2vec_model[token.text] for token in doc if not token.is_stop and token.is_alpha and token.text in word2vec_model]
        if vectors:
            mean_vector = np.mean(vectors, axis=0)
        else:
            mean_vector = np.zeros(word2vec_model.vector_size)
        embeddings.append(mean_vector)
    return np.array(embeddings)

def extract_ner_features(texts, nlp):
    entity_types = ['ORG', 'PRODUCT', 'MONEY', 'QUANTITY']  # Define entity types to include
    features = []

    for doc in nlp.pipe(texts):
        feature_row = {entity: 0 for entity in entity_types}
        for ent in doc.ents:
            if ent.label_ in entity_types:
              feature_row[ent.label_] = 1
        features.append(feature_row)

    return pd.DataFrame(features)

def main():
    # Load SpaCy model and word2vec model
    word2vec_model = word_vectors

    train_data, test_data = load_and_prepare_data()

    mlb = MultiLabelBinarizer()
    y_train = mlb.fit_transform(train_data['categories'])
    y_test = mlb.transform(test_data['categories'])

    # TF-IDF features
    tfidf = TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1, 1))
    X_train_tfidf = tfidf.fit_transform(train_data['text'])
    X_test_tfidf = tfidf.transform(test_data['text'])

    # NER features
    ner_train_features = extract_ner_features(train_data['text'], nlp)
    ner_test_features = extract_ner_features(test_data['text'], nlp)

    # Mean Word2Vec features
    word2vec_train_features = extract_mean_word2vec(train_data['text'], word2vec_model)
    word2vec_test_features = extract_mean_word2vec(test_data['text'], word2vec_model)

    print("\n\n ======= With Word2Vec feature")
    # Combine all features
    X_train = hstack([X_train_tfidf, word2vec_train_features])
    X_test = hstack([X_test_tfidf, word2vec_test_features])

    results = []

    for model_name, model in models.items():
        print(f"\nTraining {model_name}...")
        classifier = OneVsRestClassifier(model)
        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_test)
        metrics = classification_report(y_test, y_pred, target_names=mlb.classes_, zero_division=0)
        print(metrics)

        # Decode predictions and ground truth
        predicted_categories = mlb.inverse_transform(y_pred)
        ground_truth = mlb.inverse_transform(y_test)

        # Evaluate predictions
        accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)
        print(f"{model_name} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

        results.append({
            'Model': model_name,
            'accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1
        })

    results_df = pd.DataFrame(results)
    print("\nSummary of Results:")
    print(results_df)

    # # Train SVM classifier
    # base_svm = LinearSVC(random_state=42)
    # classifier = OneVsRestClassifier(base_svm)
    # classifier.fit(X_train, y_train)

    # # Predict and evaluate
    # y_pred = classifier.predict(X_test)
    # predicted_categories = mlb.inverse_transform(y_pred)
    # ground_truth = mlb.inverse_transform(y_test)

    # accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)
    # print(f"Accuracy: {accuracy:.4f}")
    # print(f"Precision: {precision:.4f}")
    # print(f"Recall: {recall:.4f}")
    # print(f"F1-Score: {f1:.4f}")

    # results = pd.DataFrame({
    #     'Text': test_data['text'],
    #     'True Categories': ground_truth,
    #     'Raw True Categories': test_data['categories'],
    #     'Predicted Categories': predicted_categories
    # })
    # print(results.head())

    print("\n\n =========== With Word2Vec + NER")
    X_train = hstack([X_train_tfidf, ner_train_features, word2vec_train_features])
    X_test = hstack([X_test_tfidf, ner_test_features, word2vec_test_features])

    results = []
    for model_name, model in models.items():
        print(f"\nTraining {model_name}...")
        classifier = OneVsRestClassifier(model)
        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_test)

        metrics = classification_report(y_test, y_pred, target_names=mlb.classes_, zero_division=0)
        print(metrics)

        # Decode predictions and ground truth
        predicted_categories = mlb.inverse_transform(y_pred)
        ground_truth = mlb.inverse_transform(y_test)

        # Evaluate predictions
        accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)


        print(f"{model_name} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

        results.append({
            'Model': model_name,
            'accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1
        })

    results_df = pd.DataFrame(results)
    print("\nSummary of Results:")
    print(results_df)

    #     # Train SVM classifier
    # base_svm = LinearSVC(random_state=42)
    # classifier = OneVsRestClassifier(base_svm)
    # classifier.fit(X_train, y_train)

    # # Predict and evaluate
    # y_pred = classifier.predict(X_test)
    # predicted_categories = mlb.inverse_transform(y_pred)
    # ground_truth = mlb.inverse_transform(y_test)

    accuracy, precision, recall, f1 = evaluate_predictions(predicted_categories, ground_truth)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")

    results = pd.DataFrame({
        'Text': test_data['text'],
        'True Categories': ground_truth,
        'Raw True Categories': test_data['categories'],
        'Predicted Categories': predicted_categories
    })
    print(results.head())


print("1 VS Rest Classifier with TF-IDF, NER, and Mean Word2Vec Features")
main()



1 VS Rest Classifier with TF-IDF, NER, and Mean Word2Vec Features



Training LinearSVC...
                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.79      0.72      0.75        57
           DRINKS#PRICES       0.00      0.00      0.00         3
          DRINKS#QUALITY       0.38      0.24      0.29        21
    DRINKS#STYLE_OPTIONS       0.50      0.17      0.25        12
             FOOD#PRICES       0.55      0.27      0.36        22
            FOOD#QUALITY       0.78      0.75      0.76       226
      FOOD#STYLE_OPTIONS       0.53      0.33      0.41        48
        LOCATION#GENERAL       0.80      0.31      0.44        13
      RESTAURANT#GENERAL       0.68      0.46      0.55       142
RESTAURANT#MISCELLANEOUS       0.50      0.12      0.20        33
       RESTAURANT#PRICES       0.62      0.24      0.34        21
         SERVICE#GENERAL       0.81      0.74      0.77       145

               micro avg       0.73      0.57    

# BERT

In [1]:
!pip install datasets

from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [2]:
# Load pretrained DistilBERT model and tokenizer
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

sentence_a = "this is a sentence"
sentence_b = "FOOD,STYLE OPTIONS"

encoding = tokenizer(sentence_a, sentence_b, padding="max_length", truncation=True)
tokenizer.decode(encoding["input_ids"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


'[CLS] this is a sentence [SEP] food, style options [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] 

In [3]:
def load_and_prepare_data():
    ds = load_dataset("alexcadillon/SemEval2016Task5", "restaurants")

    def extract_data(data):
        extracted_data = []
        for item in data:
            for sentence in item['sentences']:
                text = sentence['text']
                opinions = sentence.get('opinions', [])
                opinion_tuples = [
                    (opinion['category']['entity'], opinion['category']['attribute'], opinion['polarity'])
                    for opinion in opinions
                ]
                extracted_data.append({
                    'sentenceId': sentence['sentenceId'],
                    'text': text,
                    'opinions': opinion_tuples
                })
        return pd.DataFrame(extracted_data)

    return extract_data(ds['train']), extract_data(ds['test'])

In [4]:
import pandas as pd

train_data, test_data = load_and_prepare_data()

opinions_set = set(
    f"{entity.upper()},{attribute.upper().replace('_', ' ')}"
    for opinions in train_data['opinions']
    for entity, attribute, _ in opinions
)

def prepare_training_data(data):
    """
    Prepares the training data with text, categories, and labels.
    If a category exists in the row's opinions, its label is 1; otherwise, it is 0.
    """
    # Extract all unique opinions from the dataset
    rows = []

    # Iterate through the dataset rows
    for _, row in data.iterrows():
        text = row['text']
        # Extract categories present in this row
        existing_categories = {
            f"{entity.upper()},{attribute.upper().replace('_', ' ')}"
            for entity, attribute, _ in row['opinions']
        }

        # Iterate only over the categories in the global opinions set
        for category_str in opinions_set:
            label = 1 if category_str in existing_categories else 0
            rows.append({'text': text, 'category': category_str, 'label': label})

    return pd.DataFrame(rows)

train_data_processed = prepare_training_data(train_data)
print(train_data_processed.head())
print(train_data_processed['label'].value_counts())

SemEval2016Task5.py:   0%|          | 0.00/12.9k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/146k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/58.1k [00:00<?, ?B/s]

Generating trial split:   0%|          | 0/10 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/350 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/90 [00:00<?, ? examples/s]

                                                text              category  \
0  Judging from previous posts this used to be a ...          FOOD,QUALITY   
1  Judging from previous posts this used to be a ...    RESTAURANT,GENERAL   
2  Judging from previous posts this used to be a ...    FOOD,STYLE OPTIONS   
3  Judging from previous posts this used to be a ...           FOOD,PRICES   
4  Judging from previous posts this used to be a ...  DRINKS,STYLE OPTIONS   

   label  
0      0  
1      1  
2      0  
3      0  
4      0  
label
0    21742
1     2258
Name: count, dtype: int64


In [5]:
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# Prepare the dataset for BERT
def prepare_bert_dataset(data):
    """Combines text and category for BERT and returns a Dataset."""
    sentences_a = data['text']
    sentences_b = data['category']
    labels = data['label']

    # Create a Hugging Face Dataset
    dataset = Dataset.from_dict({
        'sentence_a': sentences_a,
        'sentence_b': sentences_b,
        'label': labels
    })

    return dataset

# Tokenization function
def tokenize_function(batch):
    return tokenizer(
        batch['sentence_a'],
        batch['sentence_b']
    )

# Prepare data
train_data, test_data = load_and_prepare_data()  # Load raw data
train_data_processed = prepare_training_data(train_data)  # Preprocess data
test_data_processed = prepare_training_data(test_data)

# Create datasets for BERT
train_dataset = prepare_bert_dataset(train_data_processed)
test_dataset = prepare_bert_dataset(test_data_processed)

# Tokenize datasets
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/24000 [00:00<?, ? examples/s]

Map:   0%|          | 0/8112 [00:00<?, ? examples/s]

In [6]:
# Define metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    accuracy = accuracy_score(labels, predictions)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

In [7]:


import os

os.environ['WANDB_DISABLED'] = 'true'

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',            # Output directory
    evaluation_strategy="steps",
    num_train_epochs=10,                # Number of training epochs
    per_device_train_batch_size=32,    # Batch size for training
    per_device_eval_batch_size=128,     # Batch size for evaluation
    logging_steps=500,                  # Log every 50 steps
    save_total_limit=2,                # Limit the number of saved checkpoints
    metric_for_best_model="f1",        # Use F1-score to select the best model
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(results)

# Save the model and tokenizer
model.save_pretrained("./fine_tuned_bert")
tokenizer.save_pretrained("./fine_tuned_bert")


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,0.2285,0.18315,0.938363,0.738703,0.506057,0.600639
1000,0.1591,0.173447,0.942184,0.725329,0.59354,0.65285
1500,0.1413,0.143383,0.947362,0.707895,0.724092,0.715902
2000,0.0975,0.158428,0.953402,0.772795,0.695828,0.732295
2500,0.0782,0.168754,0.952416,0.801012,0.6393,0.711078
3000,0.065,0.156421,0.954019,0.776946,0.69852,0.735648
3500,0.0395,0.219032,0.951923,0.73502,0.742934,0.738956
4000,0.0341,0.250291,0.951307,0.740997,0.720054,0.730375
4500,0.0304,0.242565,0.9518,0.739782,0.730821,0.735274
5000,0.0157,0.307611,0.951307,0.760479,0.683715,0.720057


{'eval_loss': 0.30178993940353394, 'eval_accuracy': 0.9519230769230769, 'eval_precision': 0.7331571994715984, 'eval_recall': 0.746971736204576, 'eval_f1': 0.74, 'eval_runtime': 8.674, 'eval_samples_per_second': 935.205, 'eval_steps_per_second': 7.378, 'epoch': 10.0}


('./fine_tuned_bert/tokenizer_config.json',
 './fine_tuned_bert/special_tokens_map.json',
 './fine_tuned_bert/vocab.txt',
 './fine_tuned_bert/added_tokens.json',
 './fine_tuned_bert/tokenizer.json')

In [20]:
# copy ./results to google drive ./results_bert_date_time
import os
import shutil
from datetime import datetime
# Define source and destination paths
source_path = './fine_tuned_bert'
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
destination_path = f'/content/drive/My Drive/results_bert_{current_time}'

# Ensure the source exists
if os.path.exists(source_path):
    # Copy the directory to Google Drive
    shutil.copytree(source_path, destination_path)
    print(f"Copied '{source_path}' to '{destination_path}'")
else:
    print(f"Source directory '{source_path}' does not exist!")


Copied './fine_tuned_bert' to '/content/drive/My Drive/results_bert_20241211_134025'


In [21]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Path to the saved model in Google Drive
model_path = destination_path

# Load the model and tokenizer
model_load = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer_load = AutoTokenizer.from_pretrained(model_path)

print("Model and tokenizer loaded successfully!")


Model and tokenizer loaded successfully!


In [22]:
from itertools import product
# Generate all possible entity-attribute pairs
entity_attribute_combinations = opinions_set
entity_attribute_caps = [
    ea.upper().replace(",", "#").replace(" ", "_")
    for ea in entity_attribute_combinations
]
print(entity_attribute_combinations)
print(entity_attribute_caps)

# Tokenization function
def tokenize_pair(sentence, category):
    return tokenizer(sentence, category, return_tensors="pt", truncation=True, padding=True)

# Inference loop
def infer_entity_attribute_pairs(test_data, model, tokenizer, entity_attribute_combinations):
    results = []
    test_data_copy = test_data.copy()
    test_data_copy['opinions'] = test_data_copy['opinions'].apply(lambda x: [
        f"{entity.upper()}#{attribute.upper().replace(' ', '_')}"
        for entity, attribute, polarity in x
    ])

    for idx, row in test_data_copy.iterrows():
        sentence = row['text']
        predictions = []

        for pair in entity_attribute_combinations:
            # Tokenize the pair (sentence, entity#attribute)
            tokens = tokenize_pair(sentence, pair)
            # tokens to gpu
            tokens = {k: v.to(model.device) for k, v in tokens.items()}
            # Perform inference
            with torch.no_grad():
                outputs = model(**tokens)
                logits = outputs.logits
                predicted_label = np.argmax(logits.cpu().numpy(), axis=-1)[0]  # Get the predicted label (0 or 1)

            # If the prediction is 1, add the pair to the results
            if predicted_label == 1:
                predictions.append(pair.replace(",","#").replace(" ", "_"))
        if idx % 20 == 0:
          print(f"Sentence {idx}: {sentence}")
          print(f"Predicted Pairs: {predictions}")
          print(f"Ground truth: {row['opinions']}")

        # Store results for the current sentence
        results.append({
            'idx': row['sentenceId'],
            'sentence': sentence,
            'category_truth': row['opinions'],
            'predicted_pairs': predictions
        })

    return pd.DataFrame(results)

# # Load and prepare test data
# train_data, test_data = load_and_prepare_data()  # Your data loading function
print(len(test_data))
results_df = infer_entity_attribute_pairs(test_data, model, tokenizer, entity_attribute_combinations)
# save to csv
results_df.to_csv("results_bert.csv", index=False)
# Display results
print(results_df)

{'FOOD,QUALITY', 'RESTAURANT,GENERAL', 'FOOD,STYLE OPTIONS', 'FOOD,PRICES', 'DRINKS,STYLE OPTIONS', 'SERVICE,GENERAL', 'RESTAURANT,PRICES', 'DRINKS,QUALITY', 'DRINKS,PRICES', 'LOCATION,GENERAL', 'AMBIENCE,GENERAL', 'RESTAURANT,MISCELLANEOUS'}
['FOOD#QUALITY', 'RESTAURANT#GENERAL', 'FOOD#STYLE_OPTIONS', 'FOOD#PRICES', 'DRINKS#STYLE_OPTIONS', 'SERVICE#GENERAL', 'RESTAURANT#PRICES', 'DRINKS#QUALITY', 'DRINKS#PRICES', 'LOCATION#GENERAL', 'AMBIENCE#GENERAL', 'RESTAURANT#MISCELLANEOUS']
676
Sentence 0: Yum!
Predicted Pairs: []
Ground truth: ['FOOD#QUALITY']
Sentence 20: Overrated
Predicted Pairs: ['RESTAURANT#GENERAL']
Ground truth: ['RESTAURANT#GENERAL']
Sentence 40: I have not a bad thing to say about this place.
Predicted Pairs: ['RESTAURANT#GENERAL']
Ground truth: ['RESTAURANT#GENERAL']
Sentence 60: Rice is too dry, tuna wasn't so fresh either.
Predicted Pairs: ['FOOD#QUALITY']
Ground truth: ['FOOD#QUALITY', 'FOOD#QUALITY']
Sentence 80: It is not worth going at all and spend your money t

In [10]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer

def evaluate_multilabel_classification(results_df, all_categories):
    """
    Evaluates the multi-label classification performance using sklearn metrics.

    Args:
        results_df (pd.DataFrame): The DataFrame containing ground truth and predicted pairs.
        all_categories (list): List of all possible categories.

    Returns:
        dict: Metrics including precision, recall, f1, and accuracy.
    """
    # Convert ground truth and predictions into binary matrices
    mlb = MultiLabelBinarizer(classes=all_categories)
    y_true = mlb.fit_transform(results_df['category_truth'])
    y_pred = mlb.transform(results_df['predicted_pairs'])

    # Compute metrics
    metrics = classification_report(y_true, y_pred, target_names=all_categories, zero_division=0, output_dict=True)
    accuracy = accuracy_score(y_true, y_pred)

    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=all_categories, zero_division=0))

    print(f"Accuracy: {accuracy:.4f}")
    return metrics

metrics = evaluate_multilabel_classification(results_df, entity_attribute_caps)
metrics

Classification Report:
                          precision    recall  f1-score   support

            FOOD#QUALITY       0.78      0.90      0.84       226
      RESTAURANT#GENERAL       0.74      0.61      0.67       142
      FOOD#STYLE_OPTIONS       0.50      0.60      0.55        48
             FOOD#PRICES       0.62      0.82      0.71        22
    DRINKS#STYLE_OPTIONS       0.67      0.67      0.67        12
         SERVICE#GENERAL       0.84      0.85      0.85       145
       RESTAURANT#PRICES       0.60      0.43      0.50        21
          DRINKS#QUALITY       0.57      0.38      0.46        21
           DRINKS#PRICES       0.00      0.00      0.00         3
        LOCATION#GENERAL       0.89      0.62      0.73        13
        AMBIENCE#GENERAL       0.69      0.93      0.79        57
RESTAURANT#MISCELLANEOUS       0.47      0.27      0.35        33

               micro avg       0.73      0.75      0.74       743
               macro avg       0.61      0.59      

{'FOOD#QUALITY': {'precision': 0.7816091954022989,
  'recall': 0.9026548672566371,
  'f1-score': 0.837782340862423,
  'support': 226.0},
 'RESTAURANT#GENERAL': {'precision': 0.7413793103448276,
  'recall': 0.6056338028169014,
  'f1-score': 0.6666666666666666,
  'support': 142.0},
 'FOOD#STYLE_OPTIONS': {'precision': 0.5,
  'recall': 0.6041666666666666,
  'f1-score': 0.5471698113207547,
  'support': 48.0},
 'FOOD#PRICES': {'precision': 0.6206896551724138,
  'recall': 0.8181818181818182,
  'f1-score': 0.7058823529411765,
  'support': 22.0},
 'DRINKS#STYLE_OPTIONS': {'precision': 0.6666666666666666,
  'recall': 0.6666666666666666,
  'f1-score': 0.6666666666666666,
  'support': 12.0},
 'SERVICE#GENERAL': {'precision': 0.8424657534246576,
  'recall': 0.8482758620689655,
  'f1-score': 0.845360824742268,
  'support': 145.0},
 'RESTAURANT#PRICES': {'precision': 0.6,
  'recall': 0.42857142857142855,
  'f1-score': 0.5,
  'support': 21.0},
 'DRINKS#QUALITY': {'precision': 0.5714285714285714,
  'r

# LLM

In [None]:
!pip install datasets

from transformers import pipeline, set_seed
from itertools import product
import pandas as pd
import re
from datasets import load_dataset
# Load a text-generation model
generator = pipeline('text-generation', model='Qwen/Qwen2.5-0.5B', device=0)
set_seed(42)



In [None]:
from transformers import pipeline, set_seed
from itertools import product
import pandas as pd
from datasets import load_dataset
from tqdm import tqdm
import os
from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer

entity_attribute_combinations = ['FOOD#STYLE_OPTIONS', 'RESTAURANT#GENERAL', 'DRINKS#QUALITY', 'RESTAURANT#PRICES', 'LOCATION#GENERAL', 'DRINKS#PRICES', 'RESTAURANT#MISCELLANEOUS', 'AMBIENCE#GENERAL', 'SERVICE#GENERAL', 'DRINKS#STYLE_OPTIONS', 'FOOD#QUALITY', 'FOOD#PRICES']

# Few-shot prompt generator
def generate_few_shot_prompt(sentence, candidate_pairs, examples):
    """
    Generates a prompt with a few labeled examples included.
    """
    query = "Given a sentence, identify all applicable entity and attribute pairs from the following options:\n"
    query += ", ".join(candidate_pairs)
    query += "\n Here are a few examples:"
    for example in examples:
        query += f"\nSentence: \"{example['text']}\" -> Predicted pairs: {', '.join(example['categories'])}\n"
    query += f"\nNow predict for the most relevant entity attribute pairs from following sentence. A relevant pair is any aspect the sentence is talking about:\n"
    query += f"Sentence: \"{sentence}\" -> Predicted pairs: "
    return query

# Few-shot classification
def classify_sentence_few_shot(sentence, candidate_pairs, few_shot_examples):
    """
    Performs classification using a few-shot prompt.
    """
    prompt = generate_few_shot_prompt(sentence, candidate_pairs, few_shot_examples)
    output = generator(prompt, max_new_tokens=50, pad_token_id=50256)
    generated_text = output[0]['generated_text']
    predicted_text = generated_text.split("Predicted pairs:")[-1].strip().lower()

    # Extract predicted pairs
    predicted_pairs = []
    for pair in candidate_pairs:
        if pair.lower() in predicted_text:
            predicted_pairs.append(pair)

    return predicted_pairs, predicted_text

# Evaluation function
def evaluate_predictions(predictions, ground_truth):
    """
    Evaluates predictions using precision, recall, and F1 score.
    """
    true_set = set(ground_truth)
    pred_set = set(predictions)

    true_positive = len(true_set & pred_set)
    false_positive = len(pred_set - true_set)
    false_negative = len(true_set - pred_set)

    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1

# Load and prepare data
def load_and_prepare_data():
    ds = load_dataset("alexcadillon/SemEval2016Task5", "restaurants")

    def extract_data(data):
        extracted_data = []
        for item in data:
            for sentence in item['sentences']:
                text = sentence['text']
                opinions = sentence.get('opinions', [])
                opinion_tuples = [
                    f"{opinion['category']['entity']}#{opinion['category']['attribute']}"
                    for opinion in opinions
                ]
                extracted_data.append({
                    'sentenceId': sentence['sentenceId'],
                    'text': text,
                    'categories': opinion_tuples
                })
        return pd.DataFrame(extracted_data)

    return extract_data(ds['train']), extract_data(ds['test'])

# Few-shot Main function
def main():
    train_data, test_data = load_and_prepare_data()
    few_shot_examples = train_data.sample(n=5).to_dict('records')  # Select 5 examples for few-shot learning

    results = []
    save_interval = 50  # Save every 20 iterations
    output_file = "results_few_shot.csv"

    mlb = MultiLabelBinarizer()
    y_train = mlb.fit_transform(train_data['categories'])
    y_test = mlb.transform(test_data['categories'])
    print(y_train[0])
    print(y_test[0])


    ground_truths = []
    predictions = []

    # Use tqdm for progress tracking
    for idx, row in tqdm(test_data.iterrows(), total=test_data.shape[0], desc="Processing Rows"):
        sentence = row['text']
        # Predict categories using few-shot classification
        predicted_categories, predicted_text = classify_sentence_few_shot(sentence, entity_attribute_combinations, few_shot_examples)

        ground_truth = mlb.transform([row['categories']])
        test_prediction = mlb.transform([predicted_categories])


        predictions.append(test_prediction[0])
        ground_truths.append(ground_truth[0])

        # Save results periodically
        if (idx + 1) % save_interval == 0 or (idx + 1) == len(test_data):
            metrics = classification_report(ground_truths, predictions, target_names=mlb.classes_, zero_division=0)
            print(metrics)
            results_df = pd.DataFrame(results)
            results_df.to_csv(str(idx) + output_file, index=False)
    print("Final metrics")
    metrics = classification_report(ground_truths, predictions, target_names=mlb.classes_, zero_division=0)

    print("Processing complete.")
    print(f"Results saved to {output_file}")

main()

[0 0 0 0 0 0 0 0 1 0 0 0]
[0 0 0 0 0 1 0 0 0 0 0 0]


Processing Rows:   7%|▋         | 50/676 [01:17<17:07,  1.64s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.04      1.00      0.08         2
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.00      0.00      0.00         0
    DRINKS#STYLE_OPTIONS       0.04      1.00      0.08         2
             FOOD#PRICES       0.03      1.00      0.06         1
            FOOD#QUALITY       0.34      0.61      0.44        18
      FOOD#STYLE_OPTIONS       0.04      1.00      0.08         2
        LOCATION#GENERAL       0.00      0.00      0.00         0
      RESTAURANT#GENERAL       0.31      0.94      0.47        16
RESTAURANT#MISCELLANEOUS       0.00      0.00      0.00         0
       RESTAURANT#PRICES       0.00      0.00      0.00         0
         SERVICE#GENERAL       0.27      0.23      0.25        13

               micro avg       0.10      0.67      0.17        54
               macro avg       0.09      0.48      0.12        54
        

Processing Rows:  15%|█▍        | 100/676 [02:34<14:31,  1.51s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.06      1.00      0.12         6
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.01      1.00      0.02         1
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04         2
             FOOD#PRICES       0.03      1.00      0.06         2
            FOOD#QUALITY       0.43      0.75      0.55        40
      FOOD#STYLE_OPTIONS       0.05      1.00      0.10         5
        LOCATION#GENERAL       0.00      0.00      0.00         1
      RESTAURANT#GENERAL       0.28      0.93      0.43        28
RESTAURANT#MISCELLANEOUS       0.00      0.00      0.00         0
       RESTAURANT#PRICES       0.00      0.00      0.00         3
         SERVICE#GENERAL       0.38      0.22      0.28        27

               micro avg       0.11      0.68      0.19       115
               macro avg       0.11      0.58      0.13       115
        

Processing Rows:  22%|██▏       | 150/676 [03:47<13:49,  1.58s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.17        13
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.02      1.00      0.04         3
    DRINKS#STYLE_OPTIONS       0.03      1.00      0.05         4
             FOOD#PRICES       0.03      1.00      0.06         3
            FOOD#QUALITY       0.39      0.77      0.52        53
      FOOD#STYLE_OPTIONS       0.07      1.00      0.12         9
        LOCATION#GENERAL       0.00      0.00      0.00         2
      RESTAURANT#GENERAL       0.30      0.95      0.45        42
RESTAURANT#MISCELLANEOUS       0.00      0.00      0.00         2
       RESTAURANT#PRICES       0.00      0.00      0.00         6
         SERVICE#GENERAL       0.35      0.21      0.26        34

               micro avg       0.11      0.70      0.19       171
               macro avg       0.11      0.58      0.14       171
        

Processing Rows:  30%|██▉       | 200/676 [05:01<11:28,  1.45s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.16        17
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.02      1.00      0.04         4
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04         4
             FOOD#PRICES       0.02      0.75      0.04         4
            FOOD#QUALITY       0.42      0.81      0.55        75
      FOOD#STYLE_OPTIONS       0.07      1.00      0.12        12
        LOCATION#GENERAL       0.12      0.33      0.18         3
      RESTAURANT#GENERAL       0.26      0.96      0.41        50
RESTAURANT#MISCELLANEOUS       0.04      0.17      0.06         6
       RESTAURANT#PRICES       0.00      0.00      0.00         6
         SERVICE#GENERAL       0.33      0.16      0.22        44

               micro avg       0.11      0.70      0.19       225
               macro avg       0.12      0.60      0.15       225
        

Processing Rows:  37%|███▋      | 250/676 [06:13<09:57,  1.40s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.10      1.00      0.18        23
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.05      1.00      0.09        10
    DRINKS#STYLE_OPTIONS       0.03      1.00      0.05         6
             FOOD#PRICES       0.03      0.83      0.05         6
            FOOD#QUALITY       0.38      0.80      0.51        86
      FOOD#STYLE_OPTIONS       0.07      1.00      0.12        15
        LOCATION#GENERAL       0.10      0.25      0.14         4
      RESTAURANT#GENERAL       0.28      0.94      0.43        67
RESTAURANT#MISCELLANEOUS       0.03      0.14      0.05         7
       RESTAURANT#PRICES       0.00      0.00      0.00         6
         SERVICE#GENERAL       0.32      0.17      0.22        53

               micro avg       0.11      0.71      0.19       283
               macro avg       0.11      0.59      0.15       283
        

Processing Rows:  44%|████▍     | 300/676 [07:25<08:36,  1.37s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.10      1.00      0.18        28
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      1.00      0.08        11
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.05         7
             FOOD#PRICES       0.03      0.86      0.05         7
            FOOD#QUALITY       0.38      0.77      0.51       106
      FOOD#STYLE_OPTIONS       0.05      1.00      0.10        15
        LOCATION#GENERAL       0.10      0.25      0.14         4
      RESTAURANT#GENERAL       0.27      0.91      0.42        81
RESTAURANT#MISCELLANEOUS       0.02      0.08      0.04        12
       RESTAURANT#PRICES       0.00      0.00      0.00         8
         SERVICE#GENERAL       0.29      0.15      0.20        60

               micro avg       0.11      0.69      0.19       339
               macro avg       0.11      0.59      0.15       339
        

Processing Rows:  52%|█████▏    | 350/676 [08:38<08:21,  1.54s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.17        32
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      0.93      0.08        14
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04         7
             FOOD#PRICES       0.03      0.89      0.06         9
            FOOD#QUALITY       0.38      0.80      0.51       122
      FOOD#STYLE_OPTIONS       0.06      1.00      0.11        19
        LOCATION#GENERAL       0.09      0.20      0.12         5
      RESTAURANT#GENERAL       0.27      0.92      0.42        93
RESTAURANT#MISCELLANEOUS       0.06      0.21      0.10        14
       RESTAURANT#PRICES       0.00      0.00      0.00         8
         SERVICE#GENERAL       0.30      0.14      0.19        71

               micro avg       0.11      0.70      0.19       394
               macro avg       0.11      0.59      0.15       394
        

Processing Rows:  59%|█████▉    | 400/676 [09:52<06:59,  1.52s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.16        34
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      0.93      0.08        15
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04         7
             FOOD#PRICES       0.03      0.90      0.06        10
            FOOD#QUALITY       0.37      0.77      0.50       137
      FOOD#STYLE_OPTIONS       0.05      1.00      0.10        19
        LOCATION#GENERAL       0.09      0.17      0.12         6
      RESTAURANT#GENERAL       0.26      0.92      0.41       103
RESTAURANT#MISCELLANEOUS       0.11      0.39      0.17        18
       RESTAURANT#PRICES       0.00      0.00      0.00         8
         SERVICE#GENERAL       0.32      0.14      0.19        87

               micro avg       0.11      0.68      0.18       444
               macro avg       0.12      0.60      0.15       444
        

Processing Rows:  67%|██████▋   | 450/676 [11:05<05:22,  1.43s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.16        37
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      0.94      0.07        16
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04         8
             FOOD#PRICES       0.03      0.91      0.06        11
            FOOD#QUALITY       0.38      0.78      0.51       156
      FOOD#STYLE_OPTIONS       0.05      1.00      0.10        22
        LOCATION#GENERAL       0.09      0.17      0.12         6
      RESTAURANT#GENERAL       0.24      0.93      0.38       107
RESTAURANT#MISCELLANEOUS       0.10      0.37      0.16        19
       RESTAURANT#PRICES       0.00      0.00      0.00        12
         SERVICE#GENERAL       0.33      0.13      0.19        99

               micro avg       0.10      0.68      0.18       493
               macro avg       0.11      0.60      0.15       493
        

Processing Rows:  74%|███████▍  | 500/676 [12:17<04:05,  1.40s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.08      1.00      0.15        39
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      0.94      0.07        17
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04        10
             FOOD#PRICES       0.03      0.92      0.06        13
            FOOD#QUALITY       0.38      0.79      0.51       172
      FOOD#STYLE_OPTIONS       0.06      1.00      0.11        27
        LOCATION#GENERAL       0.08      0.11      0.10         9
      RESTAURANT#GENERAL       0.22      0.92      0.36       110
RESTAURANT#MISCELLANEOUS       0.11      0.40      0.17        20
       RESTAURANT#PRICES       0.00      0.00      0.00        13
         SERVICE#GENERAL       0.36      0.13      0.19       119

               micro avg       0.10      0.66      0.18       549
               macro avg       0.12      0.60      0.15       549
        

Processing Rows:  81%|████████▏ | 550/676 [13:29<02:52,  1.37s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.08      1.00      0.15        44
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      0.95      0.07        19
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04        11
             FOOD#PRICES       0.03      0.81      0.06        16
            FOOD#QUALITY       0.37      0.79      0.50       188
      FOOD#STYLE_OPTIONS       0.06      0.97      0.11        30
        LOCATION#GENERAL       0.08      0.11      0.10         9
      RESTAURANT#GENERAL       0.23      0.93      0.36       121
RESTAURANT#MISCELLANEOUS       0.10      0.36      0.16        22
       RESTAURANT#PRICES       0.05      0.06      0.05        16
         SERVICE#GENERAL       0.36      0.12      0.18       126

               micro avg       0.10      0.66      0.18       602
               macro avg       0.12      0.59      0.15       602
        

Processing Rows:  89%|████████▉ | 600/676 [14:43<01:53,  1.49s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.08      1.00      0.16        49
           DRINKS#PRICES       0.00      0.00      0.00         0
          DRINKS#QUALITY       0.04      0.95      0.07        20
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04        11
             FOOD#PRICES       0.03      0.79      0.07        19
            FOOD#QUALITY       0.37      0.78      0.50       206
      FOOD#STYLE_OPTIONS       0.06      0.97      0.12        34
        LOCATION#GENERAL       0.07      0.11      0.08         9
      RESTAURANT#GENERAL       0.23      0.92      0.36       132
RESTAURANT#MISCELLANEOUS       0.10      0.32      0.15        25
       RESTAURANT#PRICES       0.04      0.06      0.05        16
         SERVICE#GENERAL       0.38      0.14      0.20       133

               micro avg       0.10      0.67      0.18       654
               macro avg       0.12      0.59      0.15       654
        

Processing Rows:  96%|█████████▌| 650/676 [15:55<00:41,  1.61s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.17        57
           DRINKS#PRICES       0.00      1.00      0.01         2
          DRINKS#QUALITY       0.03      0.95      0.07        21
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04        12
             FOOD#PRICES       0.03      0.79      0.06        19
            FOOD#QUALITY       0.37      0.78      0.50       218
      FOOD#STYLE_OPTIONS       0.07      0.98      0.13        41
        LOCATION#GENERAL       0.06      0.11      0.08         9
      RESTAURANT#GENERAL       0.22      0.93      0.36       138
RESTAURANT#MISCELLANEOUS       0.10      0.30      0.15        30
       RESTAURANT#PRICES       0.04      0.05      0.04        21
         SERVICE#GENERAL       0.38      0.13      0.20       141

               micro avg       0.10      0.67      0.18       709
               macro avg       0.12      0.67      0.15       709
        

Processing Rows: 100%|██████████| 676/676 [16:32<00:00,  1.47s/it]

                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.09      1.00      0.16        57
           DRINKS#PRICES       0.01      1.00      0.01         3
          DRINKS#QUALITY       0.03      0.95      0.06        21
    DRINKS#STYLE_OPTIONS       0.02      1.00      0.04        12
             FOOD#PRICES       0.04      0.82      0.07        22
            FOOD#QUALITY       0.36      0.78      0.49       226
      FOOD#STYLE_OPTIONS       0.08      0.98      0.14        48
        LOCATION#GENERAL       0.06      0.08      0.07        13
      RESTAURANT#GENERAL       0.22      0.93      0.35       142
RESTAURANT#MISCELLANEOUS       0.09      0.27      0.14        33
       RESTAURANT#PRICES       0.04      0.05      0.04        21
         SERVICE#GENERAL       0.38      0.13      0.19       145

               micro avg       0.10      0.67      0.18       743
               macro avg       0.12      0.67      0.15       743
        


