### 1. Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
import jieba
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib
import transformers
import torch

### 2. Load data and preprocess

We are using [ASAP](https://github.com/Meituan-Dianping/asap) dataset authored by Bu et. al. ASAP is a Chinese restaurant review dataset collected from Dianping App. Reviews are written in Chinese and each review is annotated with a star rating from 1 to 5 and 18 different aspects along with the sentiment. 


Each aspect category for example Location#Transportation is is labeled as 1(Positive), 0(Neutral), −1(Negative), −2(Not-Mentioned). The data is conveniently splited into train, dev, test dataset already.

[jieba](https://github.com/fxsjy/jieba) is used 


In [2]:
def preprocess_text(text):
    words = jieba.cut(text)
    return " ".join(words)


def convert_sentiment(score):
    if score == -2:
        return "not_mentioned"
    elif score == -1:
        return "negative"
    elif score == 0:
        return "neutral"
    else:  # score == 1
        return "positive"


def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)

    # Define aspects, e.g. Food#Appearance, Service#Price, etc.
    aspect_columns = [col for col in df.columns if col not in ["id", "review", "star"]]
    y = df[aspect_columns]

    # Convert sentiment scores to categorical labels
    y = df[aspect_columns].astype("object")
    for col in y.columns:
        y.loc[:, col] = y[col].apply(convert_sentiment)

    # Data preprocessing
    df["processed_review"] = df["review"].apply(preprocess_text)

    return df["processed_review"], y, aspect_columns


train_path = "train.csv"
dev_path = "dev.csv"
test_path = "test.csv"

X_train, y_train, aspect_columns = load_and_preprocess_data(train_path)
X_dev, y_dev, _ = load_and_preprocess_data(dev_path)
X_test, y_test, _ = load_and_preprocess_data(test_path)

Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
Loading model cost 0.401 seconds.
Prefix dict has been built successfully.


In [3]:
print(f"Train shape: {X_train.shape}, {y_train.shape}")
print(f"Dev shape: {X_dev.shape}, {y_dev.shape}")
print(f"Test shape: {X_test.shape}, {y_test.shape}\n")

pd.concat([X_train, y_train], axis=1).head()

Train shape: (36850,), (36850, 18)
Dev shape: (4940,), (4940, 18)
Test shape: (4940,), (4940, 18)



Unnamed: 0,processed_review,Location#Transportation,Location#Downtown,Location#Easy_to_find,Service#Queue,Service#Hospitality,Service#Parking,Service#Timely,Price#Level,Price#Cost_effective,Price#Discount,Ambience#Decoration,Ambience#Noise,Ambience#Space,Ambience#Sanitary,Food#Portion,Food#Taste,Food#Appearance,Food#Recommend
0,状元 楼 饭店 第一次 去 ， 因为 地理位置 优越 ： 在 宁波市 和 义 大道 高 、 ...,positive,positive,positive,not_mentioned,positive,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,not_mentioned,not_mentioned
1,我 最 爱 他们 家 的 猪手 ， 麻辣 鸡爪 ， 肉片 口磨 ， 道 道菜 都 是 家常菜...,positive,not_mentioned,not_mentioned,not_mentioned,positive,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,not_mentioned,positive,not_mentioned,not_mentioned
2,我 是 比较 喜欢 荣 新馆 的 ， 因为 材料 新鲜 ， 服务 又 好 ， 价格 适中 ，...,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,not_mentioned,not_mentioned,neutral,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,not_mentioned,neutral,positive,not_mentioned
3,8.8 秒杀 的 多嘴 肉蟹 煲 ， 第一天 开业 就 去 了 ， 大众 点评 很 给 力 ...,not_mentioned,not_mentioned,not_mentioned,negative,positive,not_mentioned,not_mentioned,neutral,not_mentioned,positive,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,positive,not_mentioned,not_mentioned
4,喜欢 KOI 好多年 了 ， 但是 看着 它 的 价格 在 一路 飙涨 ， 真心 是 有点 ...,not_mentioned,positive,negative,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,not_mentioned,positive,not_mentioned,not_mentioned,not_mentioned,not_mentioned,positive,positive,not_mentioned,not_mentioned


### 3. Exploratory data analysis

In [4]:
# Create a subdirectory for EDA plots
OUTPUT_DIR = "eda_plots"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Bar plot colors for sentiment categories
SENTIMENTS = ["not_mentioned", "negative", "neutral", "positive"]
SENTIMENT_COLORS = {
    "not_mentioned": "#808080",  # Gray
    "negative": "#FF0000",  # Red
    "neutral": "#1F77B4",  # Blue
    "positive": "#2CA02C",  # Green
}


def plot_aspect_mention_frequency(y, dataset_name):
    """Plot the frequency of aspect mentions in the dataset."""
    mention_freq = (y != "not_mentioned").mean()
    plt.figure(figsize=(12, 6))
    ax = sns.barplot(x=mention_freq.index, y=mention_freq.values)
    plt.title(f"Aspect Mention Frequency in {dataset_name} Dataset")
    plt.xticks(rotation=45, ha="right")
    plt.ylabel("Proportion of Reviews Mentioning Aspect")
    annotate_bars(ax)
    plt.tight_layout()
    plt.savefig(
        os.path.join(OUTPUT_DIR, f"aspect_mention_frequency_{dataset_name}.png")
    )
    plt.close()


def plot_sentiment_distribution(y, dataset_name):
    """Plot the sentiment distribution for each aspect in the dataset."""
    for aspect in y.columns:
        plt.figure(figsize=(8, 6))
        ax = sns.countplot(
            data=y,
            x=aspect,
            order=SENTIMENTS,
            hue=aspect,
            palette=SENTIMENT_COLORS,
            legend=False,
        )
        plt.title(f"Sentiment Distribution for {aspect} in {dataset_name} Dataset")
        plt.xlabel("Sentiment")
        plt.ylabel("Count")
        annotate_bars(ax)
        plt.savefig(
            os.path.join(
                OUTPUT_DIR, f"sentiment_distribution_{aspect}_{dataset_name}.png"
            )
        )
        plt.close()


def annotate_bars(ax):
    """Annotate bars with their heights, formatting based on value range."""
    for p in ax.patches:
        height = p.get_height()
        if height <= 0:
            return
        if 0 < height < 1:
            annotation_text = f"{height:.2f}"
        else:
            # Format as whole number for other values
            annotation_text = f"{int(height)}"

        ax.annotate(
            annotation_text,
            (p.get_x() + p.get_width() / 2.0, height),
            ha="center",
            va="bottom",
            xytext=(0, 5),
            textcoords="offset points",
        )


def plot_aspect_mention_distribution(y, dataset_name):
    """Plot the distribution of the number of aspects mentioned per review."""
    num_aspects_mentioned_per_review = (y != "not_mentioned").sum(axis=1)
    plt.figure(figsize=(10, 6))
    ax = sns.histplot(
        num_aspects_mentioned_per_review, bins=range(0, len(y.columns) + 1), kde=False
    )
    plt.title(
        f"Distribution of Number of Aspects Mentioned per Review in {dataset_name} Dataset"
    )
    plt.xlabel("Number of Aspects Mentioned")
    plt.ylabel("Number of Reviews")
    annotate_bars(ax)
    plt.savefig(os.path.join(OUTPUT_DIR, f"num_aspects_mentioned_{dataset_name}.png"))
    plt.close()


def summarize_dataset(y, dataset_name):
    """Print summary statistics for the dataset."""
    print(f"{dataset_name.capitalize()}")
    print(f"Total reviews: {len(y)}")

    num_aspects_mentioned_per_review = (y != "not_mentioned").sum(axis=1)
    avg_mentions = num_aspects_mentioned_per_review.mean()

    most_mentioned_aspect = (y != "not_mentioned").sum().idxmax()
    most_mentioned_aspect_count = (y != "not_mentioned").sum().max()
    most_mentioned_aspect_percentage = (
        (y[most_mentioned_aspect] != "not_mentioned").sum() / len(y)
    ) * 100

    least_mentioned_aspect = (y != "not_mentioned").sum().idxmin()
    least_mentioned_aspect_count = (y != "not_mentioned").sum().min()
    least_mentioned_aspect_percentage = (
        (y[least_mentioned_aspect] != "not_mentioned").sum() / len(y)
    ) * 100

    print(f"Average number of aspects mentioned per review: {avg_mentions:.2f}")
    print(
        f"Most frequently mentioned aspect: {most_mentioned_aspect} {most_mentioned_aspect_count} ({most_mentioned_aspect_percentage:.2f}%)"
    )
    print(
        f"Least frequently mentioned aspect: {least_mentioned_aspect} {least_mentioned_aspect_count} ({least_mentioned_aspect_percentage:.2f}%)\n"
    )


def perform_eda(y, dataset_name):
    plot_aspect_mention_frequency(y, dataset_name)
    plot_sentiment_distribution(y, dataset_name)
    plot_aspect_mention_distribution(y, dataset_name)
    summarize_dataset(y, dataset_name)

In [5]:
perform_eda(y_train, "train")
perform_eda(y_dev, "dev")
perform_eda(y_test, "test")

Train
Total reviews: 36850
Average number of aspects mentioned per review: 5.79
Most frequently mentioned aspect: Food#Taste 34872 (94.63%)
Least frequently mentioned aspect: Service#Parking 2476 (6.72%)

Dev
Total reviews: 4940
Average number of aspects mentioned per review: 5.89
Most frequently mentioned aspect: Food#Taste 4672 (94.57%)
Least frequently mentioned aspect: Service#Parking 323 (6.54%)

Test
Total reviews: 4940
Average number of aspects mentioned per review: 5.74
Most frequently mentioned aspect: Food#Taste 4679 (94.72%)
Least frequently mentioned aspect: Service#Parking 326 (6.60%)



### 4. Word embedding

To perform any sort of training, we need to convert raw string (chars) into vectors so that they can be computed. There are plenty of ways to do it including Bag of Words (BoW), Word2vec, GloVe, etc... 

We shall try them and compare the results.

### 5. Bidirectional Encoder Representations from Transformers (BERT)

In [6]:
### 5. Bidirectional Encoder Representations from Transformers (BERT)

from transformers import BertModel, BertTokenizer, BertForSequenceClassification
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import joblib
from tqdm import tqdm
import os

# Hyperparameters
BATCH_SIZE = 16  # BERT-based model batch size
LEARNING_RATE = 0.00005  # BERT learning rate
EPOCHS = 3  # Number of epochs
MAX_SEQ_LENGTH = 512  # Maximum sequence length
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_LABELS_PER_ASPECT = 4  # Positive, Neutral, Negative, Not_Mentioned


# Define the custom dataset class
class RestaurantReviewDataset(Dataset):
    def __init__(self, reviews, labels, tokenizer, max_length):
        self.reviews = reviews
        self.labels = labels  # DataFrame with 18 aspect columns
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.reviews)

    def __getitem__(self, idx):
        review = str(self.reviews.iloc[idx])
        label_dict = {}

        # Convert labels to numerical values: -2 (not_mentioned) -> 0, -1 -> 1, 0 -> 2, 1 -> 3
        for aspect in self.labels.columns:
            label = self.labels[aspect].iloc[idx]
            if label == "not_mentioned":
                label_num = 0
            elif label == "negative":
                label_num = 1
            elif label == "neutral":
                label_num = 2
            elif label == "positive":
                label_num = 3
            label_dict[aspect] = label_num

        # Tokenize the review
        encoding = self.tokenizer(
            review,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(
                [label_dict[asp] for asp in self.labels.columns], dtype=torch.long
            ),
        }


# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")


# Define a custom multi-task BERT model
class MultiTaskBert(torch.nn.Module):
    def __init__(self, num_aspects, num_labels_per_aspect):
        super(MultiTaskBert, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-chinese")
        self.dropout = torch.nn.Dropout(0.1)
        self.classifiers = torch.nn.ModuleList(
            [torch.nn.Linear(768, num_labels_per_aspect) for _ in range(num_aspects)]
        )  # 768 is BERT hidden size

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]  # [CLS] token
        pooled_output = self.dropout(pooled_output)

        logits = [classifier(pooled_output) for classifier in self.classifiers]
        return logits  # List of logits for each aspect


# Initialize the model
model = MultiTaskBert(
    num_aspects=len(aspect_columns), num_labels_per_aspect=NUM_LABELS_PER_ASPECT
)
model.to(DEVICE)

# Prepare datasets
train_dataset = RestaurantReviewDataset(X_train, y_train, tokenizer, MAX_SEQ_LENGTH)
dev_dataset = RestaurantReviewDataset(X_dev, y_dev, tokenizer, MAX_SEQ_LENGTH)
test_dataset = RestaurantReviewDataset(X_test, y_test, tokenizer, MAX_SEQ_LENGTH)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999))

# Loss function (one for each aspect)
loss_fn = torch.nn.CrossEntropyLoss()


# Training loop
def train_model(model, train_loader, dev_loader, epochs):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}"):
            input_ids = batch["input_ids"].to(DEVICE)
            attention_mask = batch["attention_mask"].to(DEVICE)
            labels = batch["labels"].to(DEVICE)  # Shape: [batch_size, num_aspects]

            optimizer.zero_grad()
            logits = model(
                input_ids, attention_mask
            )  # List of [batch_size, num_labels]

            # Compute loss for each aspect and sum
            loss = 0
            for i, aspect_logits in enumerate(logits):
                loss += loss_fn(aspect_logits, labels[:, i])
            total_loss += loss.item()

            loss.backward()
            optimizer.step()

        avg_train_loss = total_loss / len(train_loader)
        print(
            f"Epoch {epoch + 1}/{epochs}, Average Training Loss: {avg_train_loss:.4f}"
        )

        # Validation
        evaluate_model(model, dev_loader, "Dev")


# Evaluation function
def evaluate_model(model, data_loader, dataset_name):
    model.eval()
    all_preds = [[] for _ in range(len(aspect_columns))]
    all_labels = [[] for _ in range(len(aspect_columns))]

    with torch.no_grad():
        for batch in tqdm(data_loader, desc=f"Evaluating {dataset_name}"):
            input_ids = batch["input_ids"].to(DEVICE)
            attention_mask = batch["attention_mask"].to(DEVICE)
            labels = batch["labels"].to(DEVICE)

            logits = model(input_ids, attention_mask)
            for i, aspect_logits in enumerate(logits):
                preds = torch.argmax(aspect_logits, dim=1).cpu().numpy()
                all_preds[i].extend(preds)
                all_labels[i].extend(labels[:, i].cpu().numpy())

    # Print classification report
    print(f"\n{dataset_name} Results:")
    for i, aspect in enumerate(aspect_columns):
        print(f"\n{aspect}:")
        print(
            classification_report(
                all_labels[i],
                all_preds[i],
                target_names=["not_mentioned", "negative", "neutral", "positive"],
            )
        )


# Train and evaluate
print("Training BERT model...")
train_model(model, train_loader, dev_loader, EPOCHS)

print("\nEvaluating on Test Set...")
evaluate_model(model, test_loader, "Test")

# Save the model
print("\nSaving BERT model...")
torch.save(model.state_dict(), "bert_multitask_model.pth")
print("BERT model saved successfully.")

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/269k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/412M [00:00<?, ?B/s]

Training BERT model...


Epoch 1/3: 100%|██████████| 2304/2304 [07:44<00:00,  4.96it/s]


Epoch 1/3, Average Training Loss: 9.2185


Evaluating Dev: 100%|██████████| 309/309 [00:26<00:00, 11.51it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is"


Dev Results:

Location#Transportation:
               precision    recall  f1-score   support

not_mentioned       0.96      0.98      0.97      3964
     negative       1.00      0.04      0.08        50
      neutral       0.00      0.00      0.00        42
     positive       0.89      0.86      0.88       884

     accuracy                           0.94      4940
    macro avg       0.71      0.47      0.48      4940
 weighted avg       0.94      0.94      0.94      4940


Location#Downtown:
               precision    recall  f1-score   support

not_mentioned       0.92      0.96      0.94      4061
     negative       0.00      0.00      0.00        23
      neutral       0.00      0.00      0.00        26
     positive       0.76      0.65      0.70       830

     accuracy                           0.90      4940
    macro avg       0.42      0.40      0.41      4940
 weighted avg       0.89      0.90      0.89      4940


Location#Easy_to_find:
               precision    re

Epoch 2/3: 100%|██████████| 2304/2304 [07:46<00:00,  4.93it/s]


Epoch 2/3, Average Training Loss: 6.7967


Evaluating Dev: 100%|██████████| 309/309 [00:27<00:00, 11.40it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Dev Results:

Location#Transportation:
               precision    recall  f1-score   support

not_mentioned       0.97      0.97      0.97      3964
     negative       0.53      0.52      0.53        50
      neutral       0.00      0.00      0.00        42
     positive       0.84      0.89      0.87       884

     accuracy                           0.94      4940
    macro avg       0.59      0.60      0.59      4940
 weighted avg       0.93      0.94      0.94      4940


Location#Downtown:
               precision    recall  f1-score   support

not_mentioned       0.93      0.95      0.94      4061
     negative       0.00      0.00      0.00        23
      neutral       0.00      0.00      0.00        26
     positive       0.73      0.71      0.72       830

     accuracy                           0.90      4940
    macro avg       0.41      0.41      0.41      4940
 weighted avg       0.89      0.90      0.89      4940


Location#Easy_to_find:
               precision    re

Epoch 3/3: 100%|██████████| 2304/2304 [07:47<00:00,  4.93it/s]


Epoch 3/3, Average Training Loss: 6.0337


Evaluating Dev: 100%|██████████| 309/309 [00:27<00:00, 11.43it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Dev Results:

Location#Transportation:
               precision    recall  f1-score   support

not_mentioned       0.97      0.97      0.97      3964
     negative       0.58      0.36      0.44        50
      neutral       0.00      0.00      0.00        42
     positive       0.84      0.91      0.87       884

     accuracy                           0.94      4940
    macro avg       0.60      0.56      0.57      4940
 weighted avg       0.94      0.94      0.94      4940


Location#Downtown:
               precision    recall  f1-score   support

not_mentioned       0.94      0.94      0.94      4061
     negative       0.20      0.04      0.07        23
      neutral       0.00      0.00      0.00        26
     positive       0.71      0.77      0.74       830

     accuracy                           0.90      4940
    macro avg       0.46      0.44      0.44      4940
 weighted avg       0.90      0.90      0.90      4940


Location#Easy_to_find:
               precision    re

Evaluating Test: 100%|██████████| 309/309 [00:26<00:00, 11.58it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Test Results:

Location#Transportation:
               precision    recall  f1-score   support

not_mentioned       0.97      0.97      0.97      3962
     negative       0.72      0.43      0.54        68
      neutral       0.00      0.00      0.00        47
     positive       0.83      0.90      0.87       863

     accuracy                           0.94      4940
    macro avg       0.63      0.57      0.59      4940
 weighted avg       0.93      0.94      0.94      4940


Location#Downtown:
               precision    recall  f1-score   support

not_mentioned       0.94      0.94      0.94      4070
     negative       0.33      0.08      0.13        24
      neutral       0.00      0.00      0.00        22
     positive       0.73      0.75      0.74       824

     accuracy                           0.90      4940
    macro avg       0.50      0.45      0.45      4940
 weighted avg       0.90      0.90      0.90      4940


Location#Easy_to_find:
               precision    r

In [7]:
model.eval()

MultiTaskBert(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise