## 1. Adding nesessary libraries

In [1]:
import pandas as pd
import torch
from transformers import pipeline, BertForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from datasets import DatasetDict, Dataset
from sklearn.utils.class_weight import compute_class_weight
from torch import nn
from sklearn.metrics import f1_score
from transformers import TrainingArguments, Trainer
from sklearn.metrics import (precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, 
                             matthews_corrcoef, classification_report, precision_recall_fscore_support)
import plotly.graph_objects as go
import warnings
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import plotly.figure_factory as ff
import numpy as np
warnings.filterwarnings('ignore')



In [2]:
# 
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Check if CUDA is available
cuda_available = torch.cuda.is_available()
cuda_device= torch.cuda.get_device_name(0)

if cuda_available == True:
    print('CUDA was successfully installed and compiled on my device.')
    print('CUDA device name is:', cuda_device)
else:
    print('Cuda in not available')

CUDA was successfully installed and compiled on my device.
CUDA device name is: NVIDIA GeForce GTX 1650


## 2 preprocessing

In [3]:
# Import finalized dataset as pandas data frame.
df = pd.read_csv('data_jobads_final.csv', index_col=None)

# Replace newline characters in the 'job_description' column with a space.
df['job_description'] = df['job_description'].str.replace('\n', ' ')

# Remove None values.
df = df.dropna()

# Select and use only the last two columns for this evaluation.
df = df.iloc[:,-2:]

df.head(2)

Unnamed: 0,job_description,label
0,silver stream healthcare group offer great emp...,registered_nurse
1,create a better future for yourself !! recruit...,registered_nurse


In [4]:
labels = df['label'].unique().tolist()
print(labels)

['registered_nurse', 'electrician', 'data_analyst']


In [5]:
num_labels = len(labels)
id2label = {id:label for id, label in enumerate(labels)}
label2id = {label: id for id, label in enumerate(labels)}
print(id2label, '\n')
print(label2id)

{0: 'registered_nurse', 1: 'electrician', 2: 'data_analyst'} 

{'registered_nurse': 0, 'electrician': 1, 'data_analyst': 2}


In [6]:
df['label_encoded'] = df.label.map(lambda x: label2id[x.strip()])
df.head(2)

Unnamed: 0,job_description,label,label_encoded
0,silver stream healthcare group offer great emp...,registered_nurse,0
1,create a better future for yourself !! recruit...,registered_nurse,0


In [7]:
print(df['label_encoded'].value_counts(normalize=True).sort_index(), '\n')
print(df['label_encoded'].value_counts())

0    0.552316
1    0.125214
2    0.322470
Name: label_encoded, dtype: float64 

0    644
2    376
1    146
Name: label_encoded, dtype: int64


## 3 Fine-tuning

In [57]:
# Split the DataFrame into training and testing sets while maintaining label proportions.
train, validation_test = train_test_split(df, test_size=0.3, random_state=820, stratify=df['label'])
test, validation = train_test_split(validation_test, test_size=0.5, random_state=820, stratify=validation_test['label'])

# Step 1: Calculate the total number of rows in each DataFrame
total_train = train.shape[0]
total_test = test.shape[0]
total_validation = validation.shape[0]
total_df = df.shape[0]

# Step 2: Prepare data for plotting
dataframe_names = ['Complete DataFrame', 'Train', 'Validation', 'Test',  ]
total_values = [total_df, total_train, total_validation, total_test]

# Step 3: Create an interactive bar chart
fig = go.Figure(data=[go.Bar(x=dataframe_names, y=total_values, text=total_values, textposition='auto',)])

# Customize the layout
fig.update_layout(title='Total Number of Rows in Each DataFrame',
                  title_x=0.5,
                  title_font=dict(size=20, color='blue', family='Arial, sans-serif'),
                  xaxis_title='DataFrame Names',
                  yaxis_title='Total Rows',
                  template='plotly_dark')  # You can choose a template that fits your presentation style

fig.show()

In [9]:
# Convert DataFrames to Hugging Face Dataset.
train_dataset = Dataset.from_pandas(train)
val_dataset = Dataset.from_pandas(validation)
test_dataset = Dataset.from_pandas(test)

# Remove '__index_level_0__' feature
train_dataset = train_dataset.remove_columns('__index_level_0__')
val_dataset = val_dataset.remove_columns('__index_level_0__')
test_dataset = test_dataset.remove_columns('__index_level_0__')

# Create DatasetDict.
jobads = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test' : test_dataset
    })

In [10]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

def tokenize(examples):
    return tokenizer(examples['job_description'], truncation=True, max_length=512)

jobads_encoded = jobads.map(tokenize, batched=True, batch_size=None)

print(jobads_encoded)

Map:   0%|          | 0/816 [00:00<?, ? examples/s]

Map:   0%|          | 0/175 [00:00<?, ? examples/s]

Map:   0%|          | 0/175 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['job_description', 'label', 'label_encoded', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 816
    })
    validation: Dataset({
        features: ['job_description', 'label', 'label_encoded', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 175
    })
    test: Dataset({
        features: ['job_description', 'label', 'label_encoded', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 175
    })
})


In [11]:
jobads_encoded.set_format('torch', columns=['input_ids', 'attention_mask', 'label_encoded'])

In [12]:
# Assuming df is your DataFrame with a column named 'label' representing the class labels
# Replace 'label' with the actual column name in your DataFrame

# Calculate class weights
labels = train['label_encoded'].unique()
class_weights = compute_class_weight(class_weight='balanced',
                                     classes=labels,
                                     y=train['label_encoded'])

class_weights = torch.from_numpy(class_weights).float()
print(class_weights)

tensor([0.6031, 2.6667, 1.0342])


In [13]:
jobads_encoded = jobads_encoded.rename_column('label_encoded', 'labels')
print(jobads_encoded)

DatasetDict({
    train: Dataset({
        features: ['job_description', 'label', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 816
    })
    validation: Dataset({
        features: ['job_description', 'label', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 175
    })
    test: Dataset({
        features: ['job_description', 'label', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 175
    })
})


In [14]:
class WeightedLossTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        logits = outputs.get('logits')
        labels = inputs.get('labels')
        class_weights_device = class_weights.to(model.device)
        loss_func = nn.CrossEntropyLoss(weight=class_weights_device)
        loss = loss_func(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels=3,
                                                      id2label=id2label,
                                                      label2id=label2id)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [16]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'Accuracy': acc,
        'F1' : f1,
        'Precision' : precision,
        'Recall' : recall
        }
    from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred):
    """
    Computes accuracy, F1, precision, and recall for a given set of predictions.
    
    Args:
        pred (obj): An object containing label_ids and predictions attributes.
            - label_ids (array-like): A 1D array of true class labels.
            - predictions (array-like): A 2D array where each row represents
              an observation, and each column represents the probability of 
              that observation belonging to a certain class.
              
    Returns:
        dict: A dictionary containing the following metrics:
            - Accuracy (float): The proportion of correctly classified instances.
            - F1 (float): The macro F1 score, which is the harmonic mean of precision
              and recall. Macro averaging calculates the metric independently for
              each class and then takes the average.
            - Precision (float): The macro precision, which is the number of true
              positives divided by the sum of true positives and false positives.
            - Recall (float): The macro recall, which is the number of true positives
              divided by the sum of true positives and false negatives.
    """
    # Extract true labels from the input object
    labels = pred.label_ids
    
    # Obtain predicted class labels by finding the column index with the maximum probability
    preds = pred.predictions.argmax(-1)
    
    # Compute macro precision, recall, and F1 score using sklearn's precision_recall_fscore_support function
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    
    # Calculate the accuracy score using sklearn's accuracy_score function
    acc = accuracy_score(labels, preds)
    
    # Return the computed metrics as a dictionary
    return {
        'Accuracy': acc,
        'F1': f1,
        'Precision': precision,
        'Recall': recall
    }


In [17]:
batch_size = 16
logging_steps = len(jobads_encoded['train']) // batch_size
output_dir = 'ft_bert_temuulen3'
training_args = TrainingArguments(output_dir=output_dir,
                                  num_train_epochs=3,
                                  learning_rate=2e-5,
                                  per_device_eval_batch_size=batch_size,
                                  per_gpu_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy='epoch',
                                  logging_steps=logging_steps,
                                  save_strategy='epoch',
                                  fp16=True,
                                  load_best_model_at_end=True)

In [18]:
trainer = WeightedLossTrainer(model=model,
                              args=training_args,
                              train_dataset=jobads_encoded['train'],
                              eval_dataset=jobads_encoded['validation'],
                              tokenizer=tokenizer,
                              compute_metrics= compute_metrics)

In [19]:
trainer.train()

  0%|          | 0/306 [00:00<?, ?it/s]

{'loss': 0.6005, 'learning_rate': 1.6732026143790852e-05, 'epoch': 0.5}


Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.
Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.


{'loss': 0.0934, 'learning_rate': 1.3398692810457516e-05, 'epoch': 1.0}


  0%|          | 0/11 [00:00<?, ?it/s]

Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.


{'eval_loss': 0.011694742366671562, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 65.87, 'eval_samples_per_second': 2.657, 'eval_steps_per_second': 0.167, 'epoch': 1.0}
{'loss': 0.0154, 'learning_rate': 1.0065359477124184e-05, 'epoch': 1.5}


Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.
Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.


{'loss': 0.0061, 'learning_rate': 6.732026143790851e-06, 'epoch': 2.0}


  0%|          | 0/11 [00:00<?, ?it/s]

Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.


{'eval_loss': 0.005921968724578619, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 64.6275, 'eval_samples_per_second': 2.708, 'eval_steps_per_second': 0.17, 'epoch': 2.0}
{'loss': 0.0056, 'learning_rate': 3.398692810457517e-06, 'epoch': 2.5}


Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.
Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.


{'loss': 0.0031, 'learning_rate': 6.535947712418302e-08, 'epoch': 3.0}


  0%|          | 0/11 [00:00<?, ?it/s]

Using deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.


{'eval_loss': 0.0028546287212520838, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 66.5842, 'eval_samples_per_second': 2.628, 'eval_steps_per_second': 0.165, 'epoch': 3.0}
{'train_runtime': 3122.6535, 'train_samples_per_second': 0.784, 'train_steps_per_second': 0.098, 'train_loss': 0.12066403518315234, 'epoch': 3.0}


TrainOutput(global_step=306, training_loss=0.12066403518315234, metrics={'train_runtime': 3122.6535, 'train_samples_per_second': 0.784, 'train_steps_per_second': 0.098, 'train_loss': 0.12066403518315234, 'epoch': 3.0})

In [5]:
trainer.model.save_pretrained('ft_bert_temuulen3')
tokenizer.save_pretrained('ft_bert_temuulen3_tokenizer')

NameError: name 'trainer' is not defined

## 4 Testing the model

In [22]:
# Load the fine-tuned model and tokenizer
model_path = 'ft_bert_temuulen3'
tokenizer_path = 'ft_bert_temuulen3_tokenizer'

model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

  0%|          | 0/22 [00:00<?, ?it/s]

In [None]:
# Prepare the test dataset (ensure it's tokenized in the same way as training/validation)
test_dataset = jobads_encoded['test']  # Assuming jobads_encoded['test'] is already prepared

# Create a Trainer instance
trainer = Trainer(model=model, tokenizer=tokenizer)

# Make predictions on the test dataset
predictions = trainer.predict(test_dataset)

# Convert predictions to numpy for further analysis
labels = predictions.label_ids
preds = predictions.predictions.argmax(-1)

In [56]:
# Interactive Confusion Matrix with Plotly
cm_matrix = confusion_matrix(labels, preds)
fig = ff.create_annotated_heatmap(z=cm_matrix, 
                                  x=['registered nurse', 'electrician', 'data analyst'],
                                  y=['registered nurse', 'electrician', 'data analyst'], 
                                  colorscale='Viridis', 
                                  showscale=True)
fig.update_layout(width=600, 
                  height=600, 
                  title='Confusion Matrix', 
                  xaxis=dict(title='Predicted Value', side='bottom'), 
                  yaxis_title='True Value')
fig.update_yaxes(tickangle=-90)  
fig.show()

# Detailed classification report
report = classification_report(labels, preds, output_dict=True)
print("CLASSIFICATION REPORT:")
print(classification_report(labels, preds))

CLASSIFICATION REPORT:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        96
           1       1.00      1.00      1.00        22
           2       1.00      1.00      1.00        57

    accuracy                           1.00       175
   macro avg       1.00      1.00      1.00       175
weighted avg       1.00      1.00      1.00       175



In [27]:
# Assuming `cm_matrix` and `labels` are already defined as shown previously
fig = ff.create_annotated_heatmap(
    z=cm_matrix, 
    x=np.unique(labels).tolist(), 
    y=np.unique(labels).tolist(), 
    colorscale='Viridis', 
    showscale=True,
    annotation_text=cm_matrix,  # Ensure annotations are the confusion matrix values
)

# Update layout to adjust titles and labels
fig.update_layout(
    width=600, 
    height=600, 
    title='Confusion Matrix', 
    title_x=0.5, # Title at the top
    xaxis=dict(title='Predicted Value',  # Ensure x-axis title is at the bottom
               side='bottom'),  # This is default but added for clarity
    yaxis_title='True Value',  # Y-axis title
)

# Optionally, center the annotations if they are not already centered by default
for annotation in fig['layout']['annotations']:
    annotation['xanchor'] = 'center'
    annotation['yanchor'] = 'middle'

fig.show()

In [20]:
y_pred

array([0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 2, 2,
       2, 1, 0, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 1, 2, 0, 1, 2, 0, 0, 0, 2,
       0, 2, 0, 0, 2, 1, 2, 1, 0, 1, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0,
       2, 0, 0, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 2, 1, 2, 0, 2, 2, 0, 2, 0,
       1, 1, 0, 2, 0, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 2,
       0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 2, 1, 0, 2, 0, 1, 0, 2, 0,
       0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 2, 2, 1, 0, 2, 0, 0, 0, 2, 0, 0,
       0, 0, 2, 0, 1, 0, 0, 2, 0, 2, 0, 1, 2, 2, 0, 1, 0, 0, 1, 0, 0],
      dtype=int64)

In [None]:
# Choose average method
average_method = input("Choose average method for multi-class metrics (macro/weighted): ").strip().lower()
while average_method not in ['macro', 'weighted']:
    average_method = input("Invalid input. Please enter 'macro' or 'weighted': ").strip().lower()

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average=average_method)
recall = recall_score(y_true, y_pred, average=average_method)
f1 = f1_score(y_true, y_pred, average=average_method)
mcc = matthews_corrcoef(y_true, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Matthews Correlation Coefficient: {mcc:.2f}")

# Classification Report
print("\nClassification Report:")
report = classification_report(y_true, y_pred, target_names=model.config.id2label.values(), output_dict=False)  # Adjust target_names as per your labels
print(report)

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=model.config.id2label.values(), yticklabels=model.config.id2label.values())
plt.title("Confusion Matrix")
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.show()

# Bar Chart for Precision, Recall, F1-Score
metrics_summary = classification_report(y_true, y_pred, output_dict=True, target_names=model.config.id2label.values())
class_precisions = [metrics_summary[c]['precision'] for c in model.config.id2label.values()]
class_recalls = [metrics_summary[c]['recall'] for c in model.config.id2label.values()]
class_f1s = [metrics_summary[c]['f1-score'] for c in model.config.id2label.values()]

bar_width = 0.25
r1 = np.arange(len(class_precisions))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]

plt.figure(figsize=(10, 6))
plt.bar(r1, class_precisions, color='blue', width=bar_width, edgecolor='grey', label='Precision')
plt.bar(r2, class_recalls, color='red', width=bar_width, edgecolor='grey', label='Recall')
plt.bar(r3, class_f1s, color='green', width=bar_width, edgecolor='grey', label='F1-Score')

plt.xlabel('Classes', fontweight='bold')
plt.xticks([r + bar_width for r in range(len(class_precisions))], model.config.id2label.values())
plt.title('Class-wise Precision, Recall, and F1-Score')
plt.legend()
plt.show()

In [None]:
# Choose average method
average_method = input("Choose average method for multi-class metrics (macro/weighted): ").strip().lower()
while average_method not in ['macro', 'weighted']:
    average_method = input("Invalid input. Please enter 'macro' or 'weighted': ").strip().lower()

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average=average_method)
recall = recall_score(y_true, y_pred, average=average_method)
f1 = f1_score(y_true, y_pred, average=average_method)
mcc = matthews_corrcoef(y_true, y_pred)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Matthews Correlation Coefficient: {mcc:.2f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=['Class 0', 'Class 1', 'Class 2']))

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Class 0', 'Class 1', 'Class 2'], yticklabels=['Class 0', 'Class 1', 'Class 2'])
plt.title("Confusion Matrix")
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.show()

# Bar Chart for Precision, Recall, F1-Score
metrics_summary = classification_report(y_true, y_pred, output_dict=True, target_names=['Class 0', 'Class 1', 'Class 2'])
class_precisions = [metrics_summary[c]['precision'] for c in ['Class 0', 'Class 1', 'Class 2']]
class_recalls = [metrics_summary[c]['recall'] for c in ['Class 0', 'Class 1', 'Class 2']]
class_f1s = [metrics_summary[c]['f1-score'] for c in ['Class 0', 'Class 1', 'Class 2']]

bar_width = 0.25
r1 = np.arange(len(class_precisions))
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]

plt.figure(figsize=(10, 6))
plt.bar(r1, class_precisions, color='blue', width=bar_width, edgecolor='grey', label='Precision')
plt.bar(r2, class_recalls, color='red', width=bar_width, edgecolor='grey', label='Recall')
plt.bar(r3, class_f1s, color='green', width=bar_width, edgecolor='grey', label='F1-Score')

plt.xlabel('Classes', fontweight='bold')
plt.xticks([r + bar_width for r in range(len(class_precisions))], ['Class 0', 'Class 1', 'Class 2'])
plt.title('Class-wise Precision, Recall and F1-Score')
plt.legend()
plt.show()

In [None]:
classifier = pipeline('text-classification', model='ft_bert_temuulen3', tokenizer='ft_bert_temuulen3_tokenizer')

In [None]:
test_nurse = classifier("I promote health, prevent disease, and help people who is sick")
test_el = classifier("If you don't have any light at home, I'm here to help.")
test_da = classifier("All day, I'm sitting in front of the screen, solving problems with my mouse and keyboard")
print(test_nurse)
print(test_el)
print(test_da)

In [None]:
df_jobads = pd.read_csv('data_jobseeker.csv', index_col=None)

In [None]:
df_jobads