In [None]:
!pip install transformers datasets peft pandas scikit-learn -q

In [28]:
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
    pipeline
)
from evaluate import evaluator
import evaluate
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel
import peft
import pandas as pd
import random
from sklearn.model_selection import train_test_split

# Demystifying Chat GP
## Use-Case: Text Classification
Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.

LLMs, which are capable to perform textclassification tasks are in general explicitly trained on the requested labels.

E.g. the model Roberta-base_ag_newsl is a fine-tuned version of roberta-base on the ag_news datase and can map the text to the classes World, Sports, Business, Sci/Tech.

In [7]:
# To use such a model you can simply set it up
classifier = pipeline('text-classification','achimoraites/roberta-base_ag_news')

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [8]:
news = ["Talks End With No U.S. Climate Deal A U.N. conference ended early Saturday with a vague plan for informal new talks on how to slow global warming but without a U.S. commitment to multilateral negotiations on next steps, including emissions controls.",
        "Texas' Johnson, Benson Go Out With Win (AP) AP - Their final games will be remembered for the plays others made. Still, Texas tailback Cedric Benson and linebacker Derrick Johnson went out the way they wanted to: with a Rose Bowl win.",
        "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.",
        "Gene Blocker Turns Monkeys Into Workaholics - Study (Reuters) Reuters - Procrastinating monkeys were turned\into workaholics using a gene treatment to block a key brain\compound, U.S. researchers reported on Wednesday."]
for nw in news:
    r = classifier(nw)
    print(r)

[{'label': 'Sci/Tech', 'score': 0.9806562662124634}]
[{'label': 'Sports', 'score': 0.9885386824607849}]
[{'label': 'Business', 'score': 0.9916352033615112}]
[{'label': 'Sci/Tech', 'score': 0.9826299548149109}]


In [9]:
text = "Netherlands star Lineth Beerensteyn delighted by ‘big mouth’ US’ elimination from Women’s World Cup"
classifier(text)

[{'label': 'Sports', 'score': 0.9885388016700745}]

## Text Classification in Automotive Industry

Field data analysis in the automotive industry is crucial for several reasons, primarily centered around improving vehicle design, performance, safety, and overall customer satisfaction. One of the most important statistics is the distribution on the failure modes of the vehicles. The main failure modes are

- **Engine and Transmission Failures**: Problems with the engine or transmission can lead to significant performance issues, reduced fuel efficiency, and even complete breakdowns.
- **Electrical System Failures**: Malfunctions in the electrical system can cause issues with lights, sensors, controls, and other critical components.
- **Brake System Failures**: Brake failure can be extremely dangerous, compromising vehicle safety and leading to accidents.
- **Suspension and Steering Failures**: Faulty suspension or steering systems can cause handling problems and compromise the vehicle's stability.
- **Airbag and Safety System Failures**: Failure of airbags and other safety systems can result in severe injuries during accidents.
- **Fuel System Failures**: Issues with the fuel system can lead to fuel leaks, reduced efficiency, and even fires.
- **Exhaust System Failures**: Problems in the exhaust system can result in increased emissions, decreased performance, and environmental issues.
- **Cooling System Failures**: Failure of the cooling system can lead to engine overheating and potential damage.
- **Tire Failures**: Defective or worn-out tires can result in blowouts and accidents.
- **Electronic Component Failures**: Problems with electronic components, such as the infotainment system or computer modules, can lead to malfunctions and inconvenience for the driver.


These information can be gathered from customer and/or technician comments, but they are stored in unstructured text data, e.g.:

- The exhaust system on my car started to rattle and vibrate while driving - Exhaust System Failures
- The engine failed due to a faulty fuel pump. - Engine and Transmission Failures

In [14]:
# Lets first take a look onto some dataset containing customer and technician comments
# Load dataset
df = pd.read_parquet('automotive_failure_mode_comments_01.parquet')
idx = [18, 624] + random.choices(df.index, k=10)
df.loc[idx, :]

Unnamed: 0,comment,failure_mode,failed_component,ground_truth,creator
18,The engine failed due to a faulty fuel pump.,Engine Failure,Fuel Pump,Engine and Transmission Failures,technician
624,The exhaust system on my car started to rattle...,Exhaust System Failures,Exhaust Hanger,Exhaust System Failures,customer
208,I had to replace my brake shoes because they w...,Brake System Failures,Brake Shoes,Brake System Failures,customer
947,The car's fuel gauge stopped working and then ...,Electronic Component Failures,Fuel Pump Control Module,Electronic Component Failures,customer
795,"The radiator cap is not sealing properly, caus...",Cooling System Failures,Radiator Cap,Cooling System Failures,technician
520,My car suddenly stopped in the middle of the r...,Fuel System Failures,Fuel Pump,Fuel System Failures,customer
682,The exhaust pipe on my car rusted through and ...,Exhaust System Failures,Exhaust Pipe,Exhaust System Failures,customer
368,My car's suspension felt very loose and wobbly...,Suspension and Steering Failures,Control Arm Bushing,Suspension and Steering Failures,customer
433,"The airbag deployed unexpectedly, causing inju...",Airbag and Safety System Failures,Airbag Inflator,Airbag and Safety System Failures,technician
966,The car's ABS warning light came on and the br...,Electronic Component Failures,ABS Control Module,Electronic Component Failures,customer


In [17]:
# Take the ground truth labels
failure_mode_labels = df["ground_truth"].unique()
print(failure_mode_labels)

['Engine and Transmission Failures' 'Electrical System Failures'
 'Brake System Failures' 'Suspension and Steering Failures'
 'Airbag and Safety System Failures' 'Fuel System Failures'
 'Exhaust System Failures' 'Cooling System Failures' 'Tire Failures'
 'Electronic Component Failures']


## Custom Text Classification with RoBERTa

The approach to use a text classification model is obvious. However, the main problem us, that such models have no knwoledge of custom data and must first trained on them.

We will use the model RoBERTa (Robust optimized BERT approach), which is based on Google’s BERT model released in 2018. RoBERTa is a transformers model (123 million parameters) pretrained on a large corpus of English data in a self-supervised fashion. This means it was pretrained on the raw texts only, with no humans labelling them in any way (which is why it can use lots of publicly available data) with an automatic process to generate inputs and labels from those texts.

In [19]:
# Without training, RoBERTa is just guessing some labels

model_id = "roberta-base"
base_pipe = pipeline("text-classification", model=model_id, device=0)
text = "The engine failed due to a faulty fuel pump."
base_pipe(text)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[{'label': 'LABEL_1', 'score': 0.5000951886177063}]

# Fine Tune RoBERTa

jump to the notebook HandsOn - Fine Tuning

### After fine-tuning the model, we can now use it for text classification in our use-case

In [40]:
# Set use-case configs
model_id = "roberta-base"
repository_id = "roberta-base-fine-tuned"
label_column = "ground_truth"

# Load dataset
df = pd.read_parquet('automotive_failure_mode_comments_01.parquet')
df_fine_tuning = pd.DataFrame()
df_fine_tuning["text"] = df["comment"]
df_fine_tuning["label"] = df[label_column]

labels = df_fine_tuning["label"].unique()
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label
df_fine_tuning["label"] = df_fine_tuning["label"].map(label2id)

train_text, val_text, train_labels, val_labels = train_test_split(
    df_fine_tuning["text"].tolist(), df_fine_tuning["label"].tolist(), test_size=0.2, random_state=1909
)
train_data = {"text": train_text}
train_data["label"] = train_labels
train_dataset = Dataset.from_dict(train_data).with_format("torch")
val_data = {"text": val_text}
val_data["label"] = val_labels
val_dataset = Dataset.from_dict(val_data).with_format("torch")

In [41]:
# Load the fine-tuned model
peft_model_id = repository_id
config = PeftConfig.from_pretrained(peft_model_id)
inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, id2label=id2label)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# model evaluation
metric = evaluate.load("accuracy")
task_evaluator = evaluator("text-classification")

# original model
pipe1 = pipeline("text-classification", model=inference_model, tokenizer=tokenizer, device=0)

results1 = task_evaluator.compute(
    model_or_pipeline=pipe1,
    data=val_dataset,
    metric=metric,
    label_mapping=label2id,
)

# fine-tuned model
model_finetuned = PeftModel.from_pretrained(inference_model, peft_model_id)
pipe2 = pipeline("text-classification", tokenizer=tokenizer, model=model_finetuned, device=0)

results2 = task_evaluator.compute(
    model_or_pipeline=pipe2,
    data=val_dataset,
    metric=metric,
    label_mapping=label2id,
)

print(f"Evaluation result for {inference_model.name_or_path}: {results1}")
print(f"Evaluation result for {peft_model_id}: {results2}")


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The model 'PeftModelForSequenceClassification' is not supported for text-classification. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenc

Evaluation result for roberta-base: {'accuracy': 0.08, 'total_time_in_seconds': 4.596960440998373, 'samples_per_second': 43.507009156807925, 'latency_in_seconds': 0.022984802204991862}
Evaluation result for roberta-base-fine-tuned: {'accuracy': 0.99, 'total_time_in_seconds': 4.937323142999958, 'samples_per_second': 40.50778006773897, 'latency_in_seconds': 0.024686615714999787}


Evaluation result for roberta-base: {'accuracy': 0.08, 'total_time_in_seconds': 4.596960440998373, 'samples_per_second': 43.507009156807925, 'latency_in_seconds': 0.022984802204991862}


Evaluation result for roberta-base-fine-tuned: {'accuracy': 0.99, 'total_time_in_seconds': 4.937323142999958, 'samples_per_second': 40.50778006773897, 'latency_in_seconds': 0.024686615714999787}

## Zero-Shot-Classifcation

However, there already exists models which can handle custom labels, e.g. bart-large-mnli (406 million parameters) 

In [35]:
# zero-shot model
zero_shot_pipe = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0)

In [37]:
c1 = "The exhaust system on my car started to rattle and vibrate while driving" #Exhaust System Failures
zero_shot_pipe(c1, failure_mode_labels)

{'sequence': 'The exhaust system on my car started to rattle and vibrate while driving',
 'labels': ['Exhaust System Failures',
  'Electronic Component Failures',
  'Fuel System Failures',
  'Cooling System Failures',
  'Airbag and Safety System Failures',
  'Suspension and Steering Failures',
  'Engine and Transmission Failures',
  'Electrical System Failures',
  'Brake System Failures',
  'Tire Failures'],
 'scores': [0.5082904696464539,
  0.09095343202352524,
  0.08623526245355606,
  0.08465725928544998,
  0.055397819727659225,
  0.04951249063014984,
  0.044453009963035583,
  0.034748297184705734,
  0.02875005453824997,
  0.017001882195472717]}

In [38]:
c2 = "The engine failed due to a faulty fuel pump." # Engine and Transmission Failures
zero_shot_pipe(c2, failure_mode_labels)

{'sequence': 'The engine failed due to a faulty fuel pump.',
 'labels': ['Fuel System Failures',
  'Engine and Transmission Failures',
  'Exhaust System Failures',
  'Suspension and Steering Failures',
  'Electronic Component Failures',
  'Brake System Failures',
  'Cooling System Failures',
  'Airbag and Safety System Failures',
  'Electrical System Failures',
  'Tire Failures'],
 'scores': [0.7950432896614075,
  0.07396596670150757,
  0.036834169179201126,
  0.01830694079399109,
  0.018107235431671143,
  0.014119613915681839,
  0.012575920671224594,
  0.012344281189143658,
  0.011518999002873898,
  0.007183573208749294]}

In [39]:
res = zero_shot_pipe(val_dataset["text"], candidate_labels=failure_mode_labels)
num_matching = 0
for i, r in enumerate(res):
    predicted_label = label2id[r["labels"][0]]
    gt_label = val_dataset["label"][i]
    num_matching += int(predicted_label == gt_label)

print(f"Evaluation result for zero-shot bart-large-mnli: {num_matching/len(val_dataset['text'])}")

Evaluation result for zero-shot bart-large-mnli: 0.875


Evaluation result for zero-shot bart-large-mnli: 0.875