In [None]:
!pip install transformers datasets peft evaluate pandas scikit-learn xformers -q

In [None]:
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
    pipeline
)
from evaluate import evaluator
import evaluate
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel
import peft
import pandas as pd
import random
from sklearn.model_selection import train_test_split

# Demystifying Chat-GPT
## Use-Case: Text Classification
Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.

LLMs, which are capable to perform text classification tasks are in general explicitly trained on the requested labels.

E.g. the model Roberta-base_ag_newsl is a fine-tuned version of roberta-base on the ag_news datase and can map the text to the classes 

- World
- Sports
- Business
- Sci/Tech

We can easily setup this model and use it to assign a given text or headline to the above classes

In [None]:
# To use such a model you can simply set it up
classifier = pipeline('text-classification','achimoraites/roberta-base_ag_news')

In [None]:
# We take some examples from the ag_news dataset and let the model classify them
news = ["Talks End With No U.S. Climate Deal A U.N. conference ended early Saturday with a vague plan for informal new talks on how to slow global warming but without a U.S. commitment to multilateral negotiations on next steps, including emissions controls.",
        "Texas' Johnson, Benson Go Out With Win (AP) AP - Their final games will be remembered for the plays others made. Still, Texas tailback Cedric Benson and linebacker Derrick Johnson went out the way they wanted to: with a Rose Bowl win.",
        "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.",
        "Gene Blocker Turns Monkeys Into Workaholics - Study (Reuters) Reuters - Procrastinating monkeys were turned\into workaholics using a gene treatment to block a key brain\compound, U.S. researchers reported on Wednesday."]
for nw in news:
    r = classifier(nw)
    print(f"Text to classify: {nw}")
    print(f"Classification result:{r}")
    print("--------------")

In [None]:
# But we can also take a more current headline, e.g. from cnn (https://edition.cnn.com/)
text = "India becomes the fourth country ever to land a spacecraft on the moon"
classifier(text)

In [None]:
text = "Laulauga Tausaga-Collins wins US’ first women’s discus world championship"
classifier(text)

## Text Classification in Automotive Industry

Field data analysis in the automotive industry is crucial for several reasons, primarily centered around improving vehicle design, performance, safety, and overall customer satisfaction. One of the most important statistics is the distribution on the failure modes of the vehicles. The main failure modes are:

- **Engine and Transmission Failures**: Problems with the engine or transmission can lead to significant performance issues, reduced fuel efficiency, and even complete breakdowns.
- **Electrical System Failures**: Malfunctions in the electrical system can cause issues with lights, sensors, controls, and other critical components.
- **Brake System Failures**: Brake failure can be extremely dangerous, compromising vehicle safety and leading to accidents.
- **Suspension and Steering Failures**: Faulty suspension or steering systems can cause handling problems and compromise the vehicle's stability.
- **Airbag and Safety System Failures**: Failure of airbags and other safety systems can result in severe injuries during accidents.
- **Fuel System Failures**: Issues with the fuel system can lead to fuel leaks, reduced efficiency, and even fires.
- **Exhaust System Failures**: Problems in the exhaust system can result in increased emissions, decreased performance, and environmental issues.
- **Cooling System Failures**: Failure of the cooling system can lead to engine overheating and potential damage.
- **Tire Failures**: Defective or worn-out tires can result in blowouts and accidents.
- **Electronic Component Failures**: Problems with electronic components, such as the infotainment system or computer modules, can lead to malfunctions and inconvenience for the driver.


These information can be gathered from customer and/or technician comments, but they are stored in unstructured text data, e.g.:

- The exhaust system on my car started to rattle and vibrate while driving - Exhaust System Failures
- The engine failed due to a faulty fuel pump. - Engine and Transmission Failures

We have a dataset containing customer/technician comments with according labels prepared. Let's take a look onto it...

In [None]:
# Load dataset
df = pd.read_parquet('automotive_failure_mode_comments.parquet')
idx = [624, 18] + random.choices(df.index, k=10)
df.loc[idx, :]

In [None]:
# Since we are interessted into the failure mode labels, we simply take the ground truth labels from the dataset
failure_mode_labels = df["ground_truth"].unique()
print(failure_mode_labels)

## Custom Text Classification with RoBERTa

The approach to use a text classification model is obvious. However, the main problem is that such models have no knowledge of custom data and must first be trained on them.

We will use the model RoBERTa (Robust optimized BERT approach), which is based on Google’s BERT model released in 2018. RoBERTa is a transformer model (123 million parameters) pretrained on a large corpus of English data in a self-supervised fashion. This means it was pretrained on the raw texts only, with no humans labelling them in any way (which is why it can use lots of publicly available data) with an automatic process to generate inputs and labels from those texts.

In [None]:
# Without training, RoBERTa is just guessing some labels

model_id = "roberta-base"
base_pipe = pipeline("text-classification", model=model_id, device=0)
text = "The engine failed due to a faulty fuel pump."
base_pipe(text)

# Fine Tune RoBERTa

We can see on the response of the model evaluation... the model itself told us, that we need to train it!!!

Let's do it and jump to the notebook __HandsOn - Fine Tuning__

If you don't want to fine-tune the model right now, the model is already available in the tar archive __roberta-base-fine-tuned__

To extract it, go to the launcher --> terminal --> type __tar -xf roberta-base-fine-tuned.tar__

### After fine-tuning the model, we can now use it for text classification in our use-case

In [None]:
# Set use-case configs
model_id = "roberta-base"
repository_id = "roberta-base-fine-tuned"
label_column = "ground_truth"

# Prepare training and validation datasets
# Load dataset
df = pd.read_parquet('automotive_failure_mode_comments.parquet')
df_fine_tuning = pd.DataFrame()
df_fine_tuning["text"] = df["comment"]
df_fine_tuning["label"] = df[label_column]

# Mapping from label to integer (and vice versa)
labels = df_fine_tuning["label"].unique()
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label
df_fine_tuning["label"] = df_fine_tuning["label"].map(label2id)

train_text, val_text, train_labels, val_labels = train_test_split(
    df_fine_tuning["text"].tolist(), df_fine_tuning["label"].tolist(), test_size=0.2, random_state=1909
)
train_data = {"text": train_text}
train_data["label"] = train_labels
train_dataset = Dataset.from_dict(train_data).with_format("torch")
val_data = {"text": val_text}
val_data["label"] = val_labels
val_dataset = Dataset.from_dict(val_data).with_format("torch")

In [None]:
# Load the fine-tuned model
peft_model_id = repository_id
config = PeftConfig.from_pretrained(peft_model_id)
inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, id2label=id2label)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# model evaluation
metric = evaluate.load("accuracy")
task_evaluator = evaluator("text-classification")

# original model
pipe1 = pipeline("text-classification", model=inference_model, tokenizer=tokenizer, device=0)

results1 = task_evaluator.compute(
    model_or_pipeline=pipe1,
    data=val_dataset,
    metric=metric,
    label_mapping=label2id,
)

# fine-tuned model
model_finetuned = PeftModel.from_pretrained(inference_model, peft_model_id)
pipe2 = pipeline("text-classification", tokenizer=tokenizer, model=model_finetuned, device=0)

results2 = task_evaluator.compute(
    model_or_pipeline=pipe2,
    data=val_dataset,
    metric=metric,
    label_mapping=label2id,
)

print(f"Evaluation result for {inference_model.name_or_path}: {results1}")
print(f"Evaluation result for {peft_model_id}: {results2}")


## Previous evaluated model accurency

Evaluation result for roberta-base: {'accuracy': 0.08, 'total_time_in_seconds': 4.596960440998373, 'samples_per_second': 43.507009156807925, 'latency_in_seconds': 0.022984802204991862}


Evaluation result for roberta-base-fine-tuned: {'accuracy': 0.99, 'total_time_in_seconds': 4.937323142999958, 'samples_per_second': 40.50778006773897, 'latency_in_seconds': 0.024686615714999787}

__Because the parameters in the fine tuning are randomly initialize, it might be that some fine-tuned models perform better than others!!!__

## Zero-Shot-Classifcation

However, there already exists models which can handle custom labels, e.g. bart-large-mnli (406 million parameters) 

In [None]:
# zero-shot model
zero_shot_pipe = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0)

In [None]:
c1 = "The exhaust system on my car started to rattle and vibrate while driving" #Exhaust System Failures
zero_shot_pipe(c1, failure_mode_labels)

In [None]:
c2 = "The engine failed due to a faulty fuel pump." # Engine and Transmission Failures
zero_shot_pipe(c2, failure_mode_labels)

In [None]:
res = zero_shot_pipe(val_dataset["text"], candidate_labels=failure_mode_labels)
num_matching = 0
for i, r in enumerate(res):
    predicted_label = label2id[r["labels"][0]]
    gt_label = val_dataset["label"][i]
    num_matching += int(predicted_label == gt_label)

print(f"Evaluation result for zero-shot bart-large-mnli: {num_matching/len(val_dataset['text'])}")

## Previous evaluated model accurency

Evaluation result for zero-shot bart-large-mnli: 0.875