# Import Packages

In [None]:
# reference: https://lajavaness.medium.com/regression-with-text-input-using-bert-and-transformers-71c155034b13
%%time
from google.colab import drive
drive.mount('/content/drive')
!pip install transformers
!pip install datasets --upgrade
!pip install accelerate
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from transformers import TrainingArguments, Trainer
import math
import torch

Mounted at /content/drive
Collecting transformers
  Downloading transformers-4.19.0-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 4.3 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 71.9 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 41.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.6.0-py3-none-any.whl (84 kB)
[K     |████████████████████████████████| 84 kB 3.7 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully in

# Prepare Datasets

In [None]:
# read the dataset
path = '/content/drive/MyDrive/DSO 560 NLP Team Project/'
df_1 = pd.read_csv(f'{path}clean_data/bumble_hinge_review.csv')
df = df_1[df_1['App']=='Bumble'].copy()
df.dropna(inplace=True)
df.reset_index(inplace=True)
df.rename(columns={'Review':'text','App':'app','Rating':'score','index':'id'},inplace=True)
df = df[['id','text','score','app']]

In [None]:
# split into train and test data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=2, stratify=df['score'])
raw_train_ds = Dataset.from_pandas(train_df, preserve_index=False)
raw_val_ds = Dataset.from_pandas(test_df, preserve_index=False)

{'app': 'Bumble',
 'id': 37695,
 'score': 2,
 'text': 'the algorithm and ui be complete trash ! nowhere to see how many coin you have leave . you get bury and bury the more you . load of robot and instagram clout chaser . oh , and if you swipe right on those you get even more buried . need a rehaul .'}

# Set Up Model

In [None]:
# download models and set parameters
BASE_MODEL = "albert-base-v2"
LEARNING_RATE = 2e-5
MAX_LENGTH = 256
BATCH_SIZE = 32
EPOCHS = 6

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=1)

Downloading:   0%|          | 0.00/684 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/742k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/45.2M [00:00<?, ?B/s]

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.bias', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.dense.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.weight', 'classifier.bias']
You sho

# Preprocessing

In [None]:
# make data readable by models
ds = {"train": raw_train_ds, "validation": raw_val_ds}

def preprocess_function(examples):
    label = examples["score"] 
    examples = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)
    
    # Change this to real number
    examples["label"] = float(label)
    return examples

for split in ds:
    ds[split] = ds[split].map(preprocess_function, remove_columns=["id", "score", "text", "app"])

  0%|          | 0/63114 [00:00<?, ?ex/s]

  0%|          | 0/15779 [00:00<?, ?ex/s]

# Metrics

In [None]:
# define mse, mae, r2, accuracy as the evaluation metrics
def compute_metrics_for_regression(eval_pred):
    logits, labels = eval_pred
    labels = labels.reshape(-1, 1)
    
    mse = mean_squared_error(labels, logits)
    mae = mean_absolute_error(labels, logits)
    r2 = r2_score(labels, logits)
    single_squared_errors = ((logits - labels).flatten()**2).tolist()
    
    # Compute accuracy 
    # Based on the fact that the rounded score = true score only if |single_squared_errors| < 0.5
    accuracy = sum([1 for e in single_squared_errors if e < 0.25]) / len(single_squared_errors)
    
    return {"mse": mse, "mae": mae, "r2": r2, "accuracy": accuracy}

# Loss Function

In [None]:
# define loss function
class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0][:, 0]
        loss = torch.nn.functional.mse_loss(logits, labels)
        return (loss, outputs) if return_outputs else loss

# Training

In [None]:
# define training arguments
training_args = TrainingArguments(
    output_dir=f"{path}/rating prediction/model/albert-fine-tuned-regression-on-bumble-0512",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    metric_for_best_model="accuracy",
    load_best_model_at_end=True,
    weight_decay=0.01,
)

In [None]:
# start training
trainer = RegressionTrainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["validation"],
    compute_metrics=compute_metrics_for_regression,
)

trainer.train()

***** Running training *****
  Num examples = 63114
  Num Epochs = 6
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 11838


Epoch,Training Loss,Validation Loss,Mse,Mae,R2,Accuracy
1,0.7557,0.774765,0.774765,0.561423,0.707397,0.635401
2,0.6355,0.64673,0.64673,0.549254,0.755752,0.616642
3,0.4828,0.677678,0.677678,0.535526,0.744064,0.631345
4,0.3758,0.673462,0.673462,0.519774,0.745656,0.659421
5,0.2843,0.702058,0.702058,0.524209,0.734856,0.645035
6,0.1984,0.722101,0.722101,0.531387,0.727287,0.640281


***** Running Evaluation *****
  Num examples = 15779
  Batch size = 32
Saving model checkpoint to /content/drive/MyDrive/DSO 560 NLP Team Project//rating prediction/model/albert-fine-tuned-regression-on-bumble-0512/checkpoint-1973
Configuration saved in /content/drive/MyDrive/DSO 560 NLP Team Project//rating prediction/model/albert-fine-tuned-regression-on-bumble-0512/checkpoint-1973/config.json
Model weights saved in /content/drive/MyDrive/DSO 560 NLP Team Project//rating prediction/model/albert-fine-tuned-regression-on-bumble-0512/checkpoint-1973/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 15779
  Batch size = 32
Saving model checkpoint to /content/drive/MyDrive/DSO 560 NLP Team Project//rating prediction/model/albert-fine-tuned-regression-on-bumble-0512/checkpoint-3946
Configuration saved in /content/drive/MyDrive/DSO 560 NLP Team Project//rating prediction/model/albert-fine-tuned-regression-on-bumble-0512/checkpoint-3946/config.json
Model weights saved in /co

TrainOutput(global_step=11838, training_loss=0.4713761673966137, metrics={'train_runtime': 17774.2443, 'train_samples_per_second': 21.305, 'train_steps_per_second': 0.666, 'total_flos': 4524467087960064.0, 'train_loss': 0.4713761673966137, 'epoch': 6.0})

# Evaluation on Test Set

In [None]:
# model performace evaluation
trainer.eval_dataset=ds["validation"]
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 15779
  Batch size = 32


{'epoch': 6.0,
 'eval_accuracy': 0.659420749096901,
 'eval_loss': 0.6734618544578552,
 'eval_mae': 0.519773542881012,
 'eval_mse': 0.6734617948532104,
 'eval_r2': 0.7456562040047966,
 'eval_runtime': 263.1591,
 'eval_samples_per_second': 59.96,
 'eval_steps_per_second': 1.877}

In [None]:
# look at discrepancies of predicted rating and original rating
import math
import pandas as pd

nb_batches = math.ceil(len(raw_val_ds)/BATCH_SIZE)
y_preds = []

for i in range(nb_batches):
    input_texts = raw_val_ds[i * BATCH_SIZE: (i+1) * BATCH_SIZE]["text"]
    input_labels = raw_val_ds[i * BATCH_SIZE: (i+1) * BATCH_SIZE]["score"]
    encoded = my_tokenizer(input_texts, truncation=True, padding="max_length", max_length=256, return_tensors="pt").to("cuda")
    y_preds += my_model(**encoded).logits.reshape(-1).tolist()

pd.set_option('display.max_rows', 500)
df = pd.DataFrame([raw_val_ds["text"], raw_val_ds["score"], y_preds], ["Text", "Score", "Prediction"]).T
df["Rounded Prediction"] = df["Prediction"].apply(round)
incorrect_cases = df[df["Score"] != df["Rounded Prediction"]]
incorrect_cases

# Load Saved Model

In [None]:
# load the saved model
my_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device in use:",device)
my_model = AutoModelForSequenceClassification.from_pretrained('/content/drive/MyDrive/DSO 560 NLP Team Project/rating prediction/model/albert-fine-tuned-regression-on-bumble-0512/checkpoint-7892', num_labels=1)
my_model.to(device)

Device in use: cuda


AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768,

In [None]:
# make predictions on the saved model
nb_batches = math.ceil(len(raw_val_ds)/BATCH_SIZE)
y_preds = []

for i in range(nb_batches):
    input_texts = raw_val_ds[i * BATCH_SIZE: (i+1) * BATCH_SIZE]["text"]
    input_labels = raw_val_ds[i * BATCH_SIZE: (i+1) * BATCH_SIZE]["score"]
    encoded = my_tokenizer(input_texts, truncation=True, padding="max_length", max_length=256, return_tensors="pt").to("cuda")
    y_preds += my_model(**encoded).logits.reshape(-1).tolist()

pd.set_option('display.max_rows', 500)
df = pd.DataFrame([raw_val_ds["text"], raw_val_ds["score"], y_preds], ["Text", "Score", "Prediction"]).T
df["Rounded Prediction"] = df["Prediction"].apply(round)
incorrect_cases = df[df["Score"] != df["Rounded Prediction"]]
incorrect_cases

Unnamed: 0,Text,Score,Prediction,Rounded Prediction
3,"it be work fine last night . however , when i ...",3,2.040696,2
9,for some reason it just kick me off my account...,3,1.492613,1
10,"i love it , take the pressure of send that fir...",5,4.135255,4
11,love the app and interface so far though i not...,3,3.72521,4
12,good experience but wayyyyyy too expensive,1,3.867476,4
...,...,...,...,...
15764,they devoloped the chat area a lot which be a ...,3,3.944352,4
15765,meet real female . in less than 3 day . talk o...,4,1.342662,1
15770,this be the bad app ever,5,1.035394,1
15773,so far its a kool.app,5,4.423614,4


In [None]:
# save the discrepancies reviews of predicted rating and original rating
# incorrect_cases.to_excel(f"{path}/rating prediction/incorrect_cases_alBert_Regression.xlsx")