<a href="https://colab.research.google.com/github/sweta98/Warranty-Classification-/blob/main/brembo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers




In [None]:
import pandas as pd

data = pd.read_csv('challenge2_data.csv')

In [None]:
data['fm'].value_counts()

LEAKAGE                                         2643
NOISE                                           1517
VIBRATION                                        957
DRAG FORCE                                       617
BREAKAGE/DEFORMATION                             237
AESTHETICAL DEFECT                               226
SPONGY/NOT BLEEDABLE                             156
CORROSION                                        111
WEAR / ABRASION                                  103
DEGRADATION OF PISTON OR CALIPER BODY STROKE     100
ELECTRICAL SYSTEM FAULT                           56
Name: fm, dtype: int64

In [None]:
classes = data['fm'].unique().tolist()

In [None]:
classes

['DRAG FORCE',
 'NOISE',
 'LEAKAGE',
 'SPONGY/NOT BLEEDABLE',
 'VIBRATION',
 'BREAKAGE/DEFORMATION',
 'WEAR / ABRASION',
 'CORROSION',
 'AESTHETICAL DEFECT',
 'DEGRADATION OF PISTON OR CALIPER BODY STROKE',
 'ELECTRICAL SYSTEM FAULT']

In [None]:
data['labels'] = data['fm']
data['text'] = data['claim']

In [None]:
batch_size=32
num_epochs=10

In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np

texts = data['claim'].to_list()
labels = data['fm'].to_list()

# Initialize a label encoder to convert text labels to numerical labels
label_encoder = LabelEncoder()
numerical_labels = label_encoder.fit_transform(labels)

# Initialize a one-hot encoder to encode numerical labels as one-hot vectors
one_hot_encoder = OneHotEncoder(sparse=False)
one_hot_labels = one_hot_encoder.fit_transform(numerical_labels.reshape(-1, 1))

# Split the data into train and test sets
texts_train, texts_test, labels_train, labels_test = train_test_split(texts, one_hot_labels, test_size=0.2)

# Initialize the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))

# Tokenize the text data
train_encodings = tokenizer(texts_train, truncation=True, padding=True, return_tensors='pt', max_length=64)
test_encodings = tokenizer(texts_test, truncation=True, padding=True, return_tensors='pt', max_length=64)

# Convert one-hot labels to PyTorch tensors
train_labels = torch.tensor(labels_train, dtype=torch.float32)
test_labels = torch.tensor(labels_test, dtype=torch.float32)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:


# Train the model
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(train_encodings.input_ids, train_encodings.attention_mask, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define the optimizer and loss function
from torch.optim import Adam
import torch.nn as nn

optimizer = Adam(model.parameters(), lr=1e-5)
criterion = nn.BCEWithLogitsLoss()

from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
model.train()
for epoch in tqdm(range(num_epochs)):
    for batch in train_loader:
        input_ids, attention_mask, labels = batch

        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()


model.save_pretrained("./model/")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 10/10 [09:32<00:00, 57.29s/it]


In [None]:
# Evaluate the model

model = BertForSequenceClassification.from_pretrained("./model")

model.eval()
test_dataset = TensorDataset(test_encodings.input_ids, test_encodings.attention_mask, test_labels)
test_loader = DataLoader(test_dataset, batch_size=32)

predictions = []
true_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = batch

        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        model.to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions.extend(logits.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())


In [None]:
pred = np.argmax(predictions, axis=1)
true = np.argmax(true_labels, axis=1)
accuracy = np.mean(pred == true)

print("Accuracy:", accuracy)

Accuracy: 0.9620817843866171


In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
print(confusion_matrix(pred, true))
print()
print(classification_report(pred, true))

[[ 42   0   0   0   0   0   0   0   0   1   0]
 [  2  43   1   2   0   4   0   0   0   0   0]
 [  0   0  18   0   0   0   0   0   0   0   0]
 [  0   0   0   3   0   0   0   0   0   0   0]
 [  1   0   0  10 120   0   1   0   0   0   0]
 [  0   0   0   0   0   4   0   0   0   0   0]
 [  0   2   1   1   8   0 515   0   2   1   0]
 [  0   1   0   1   0   0   0 316   0   1   0]
 [  0   0   0   2   1   0   0   1  33   0   0]
 [  1   1   0   2   0   0   0   2   0 175   0]
 [  1   0   0   0   0   0   0   0   0   0  25]]

              precision    recall  f1-score   support

           0       0.89      0.98      0.93        43
           1       0.91      0.83      0.87        52
           2       0.90      1.00      0.95        18
           3       0.14      1.00      0.25         3
           4       0.93      0.91      0.92       132
           5       0.50      1.00      0.67         4
           6       1.00      0.97      0.98       530
           7       0.99      0.99      0.99     

In [None]:
!pip install keytotext --upgrade
!sudo apt-get install git-lfs

Collecting keytotext
  Downloading keytotext-2.3.2-py3-none-any.whl (11 kB)
Collecting sentencepiece (from keytotext)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb (from keytotext)
  Downloading wandb-0.15.12-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytorch_lightning (from keytotext)
  Downloading pytorch_lightning-2.1.0-py3-none-any.whl (774 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m774.6/774.6 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets (from keytotext)
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m42.0 MB/s[0m eta [3

In [None]:
from keytotext import trainer, make_dataset

train_df = make_dataset('common_gen', split='train')
test_df = make_dataset('common_gen', split='test')

INFO:lightning_fabric.utilities.seed:Seed set to 42


Downloading builder script:   0%|          | 0.00/4.10k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/6.99k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.85M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67389 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/4018 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1497 [00:00<?, ? examples/s]

In [None]:
train_df

Unnamed: 0,keywords,text
0,ski mountain skier,Skier skis down the mountain
1,ski mountain skier,A skier is skiing down a mountain.
2,ski mountain skier,Three skiers are skiing on a snowy mountain.
3,wag tail dog,The dog is wagging his tail.
4,wag tail dog,A dog wags his tail at the boy.
...,...,...
67384,aspect worship temple feature type,type of place of worship of artwork featuring ...
67385,feature aspect temple place artwork,type of place of worship of artwork featuring ...
67386,aspect feature place artwork type,type of place of worship of artwork featuring ...
67387,type temple aspect artwork feature,type of place of worship of artwork featuring ...


In [None]:
df = data.copy()
df['keywords'] = df['fm']
df['text'] = df['claim']
df.drop('fm', axis=1, inplace=True)
df.drop('claim', axis=1, inplace=True)
df.drop('labels', axis=1, inplace=True)

train_df, test_df = train_test_split(df, test_size=0.2)

In [None]:
model = trainer()
model.from_pretrained(model_name="t5-small")
model.train(train_df=train_df, test_df=test_df, batch_size=2, max_epochs=3, use_gpu=False)
model.save_model()

TypeError: ignored

In [None]:
import torch
from transformers import BertTokenizer, BertForConditionalGeneration

# Load a pre-trained BERT model and tokenizer
model = BertForConditionalGeneration.from_pretrained("./model")

# Set the seed text
seed_text = "LEAKAGE"

# Tokenize the seed text
input_ids = tokenizer.encode(seed_text, return_tensors="pt")

# Generate text based on the seed text
output_ids = model.generate(input_ids, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True, top_k=50, top_p=0.95)

# Decode the generated text
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(generated_text)

ImportError: ignored

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW

# Sample dataset of labels and responses
data = [
    ("Greeting", "Hello! How can I assist you today?"),
    ("Question", "Sure, I can help with that. What's your question?"),
    ("Farewell", "Goodbye! Have a great day!"),
]

# Custom dataset
class LabelTextDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=50):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label, text = self.data[index]
        input_text = label + " " + text
        encoding = self.tokenizer.encode_plus(
            input_text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        return encoding

# Add a padding token to the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({'pad_token': '[PAD]'})  # Add a padding token

model = GPT2LMHeadModel.from_pretrained("gpt2")

# Prepare the dataset
dataset = LabelTextDataset(data, tokenizer)

# Data loader
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Fine-tuning parameters
optimizer = AdamW(model.parameters(), lr=1e-5)

# Fine-tuning loop
num_epochs = 3
for epoch in range(num_epochs):
    for batch in dataloader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = input_ids.clone()

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

# Save the fine-tuned model
model.save_pretrained("fine_tuned_gpt2")

# Inference
prompt = "Greeting"
input_text = prompt + " "  # Initialize with a label
for _ in range(50):  # Generate a sequence of text
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(input_ids, max_length=100, num_return_sequences=1, pad_token_id=50256)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    input_text += " " + generated_text

print(input_text)


IndexError: ignored

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW

# Sample dataset of labels and responses
data = df

# Custom dataset
class LabelTextDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=50):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label, text = self.data[index]
        input_text = label + " " + text
        encoding = self.tokenizer.encode_plus(
            input_text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        return encoding

# Add a padding token to the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set the padding token

model = GPT2LMHeadModel.from_pretrained("gpt2")

# Prepare the dataset
dataset = LabelTextDataset(data, tokenizer)

# Data loader
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Fine-tuning parameters
optimizer = AdamW(model.parameters(), lr=1e-5)

# Fine-tuning loop
num_epochs = 3
for epoch in range(num_epochs):
    for batch in dataloader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = input_ids.clone()

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

# Save the fine-tuned model
model.save_pretrained("fine_tuned_gpt2")

# Inference
prompt = "Greeting"
input_text = prompt + " "  # Initialize with a label
for _ in range(50):  # Generate a sequence of text
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(input_ids, max_length=100, num_return_sequences=1, pad_token_id=50256)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    input_text += " " + generated_text

print(input_text)


Epoch 1, Loss: 6.893852233886719
Epoch 2, Loss: 5.722869396209717
Epoch 3, Loss: 5.270862579345703


Token indices sequence length is longer than the specified maximum sequence length for this model (1935 > 1024). Running this sequence through the model will result in indexing errors


IndexError: ignored

In [None]:

# Function to generate text based on labels
def generate_text_with_label(label, max_length=100):
    input_text = label + " "  # Initialize with a label

    # Set the maximum length for the generated text
    max_length = len(tokenizer.encode(input_text)) + max_length

    while len(tokenizer.encode(input_text)) < max_length:
        input_ids = tokenizer.encode(input_text, return_tensors="pt")
        output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, pad_token_id=50256)
        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
        input_text += " " + generated_text

    return input_text

# Example usage
prompt = "Greeting"
generated_response = generate_text_with_label(prompt, max_length=100)
print(generated_response)

Greeting  Greeting  from the 
                                                                                                Greeting  Greeting  from the 
                                                                                               
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            

In [None]:
df

Unnamed: 0,text,keywords
0,Rigid Pipe Brake Caliper Front Remove & Replac...,DRAG FORCE
1,CUSTOMER STATES BRAKES SQUEAK INTERMITTENTLY W...,NOISE
2,THE CUSTOMER HAS BROUGHT THE AND FOLLOW ANNUAL...,LEAKAGE
3,Brake Caliper Rear Remove & Replace Customers ...,NOISE
4,Rear Brake Jam,DRAG FORCE
...,...,...
6718,SCREWS WERE RUSTY..,CORROSION
6719,O Ton customer: Brake rear blocked >compl. Rep...,DRAG FORCE
6720,Rattle Squeak The front strut continues make a...,NOISE
6721,The shakes when braking;Air conditioning panel...,LEAKAGE


In [None]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW


# Custom dataset
class LabelTextDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=50):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text, label = self.data.iloc[index]
        input_text = label + " " + text
        encoding = self.tokenizer.encode_plus(
            input_text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        return encoding

# Add a padding token to the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set the padding token

model = GPT2LMHeadModel.from_pretrained("gpt2")

# Prepare the dataset
dataset = LabelTextDataset(df, tokenizer)

# Data loader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Fine-tuning parameters
optimizer = AdamW(model.parameters(), lr=1e-5)

# Fine-tuning loop
num_epochs = 3
for epoch in tqdm(range(num_epochs)):
    for batch in tqdm(dataloader):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = input_ids.clone()
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        model.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

# Save the fine-tuned model
model.save_pretrained("fine_tuned_gpt2")


  0%|          | 0/3 [00:00<?, ?it/s]
  0%|          | 0/211 [00:00<?, ?it/s][A
  0%|          | 1/211 [00:00<02:16,  1.53it/s][A
  1%|          | 2/211 [00:01<01:49,  1.91it/s][A
  1%|▏         | 3/211 [00:01<01:41,  2.06it/s][A
  2%|▏         | 4/211 [00:01<01:36,  2.15it/s][A
  2%|▏         | 5/211 [00:02<01:33,  2.20it/s][A
  3%|▎         | 6/211 [00:02<01:32,  2.22it/s][A
  3%|▎         | 7/211 [00:03<01:30,  2.24it/s][A
  4%|▍         | 8/211 [00:03<01:29,  2.26it/s][A
  4%|▍         | 9/211 [00:04<01:31,  2.22it/s][A
  5%|▍         | 10/211 [00:04<01:35,  2.10it/s][A
  5%|▌         | 11/211 [00:05<01:31,  2.18it/s][A
  6%|▌         | 12/211 [00:05<01:30,  2.20it/s][A
  6%|▌         | 13/211 [00:05<01:27,  2.26it/s][A
  7%|▋         | 14/211 [00:06<01:27,  2.26it/s][A
  7%|▋         | 15/211 [00:06<01:25,  2.29it/s][A
  8%|▊         | 16/211 [00:07<01:25,  2.29it/s][A
  8%|▊         | 17/211 [00:07<01:24,  2.31it/s][A
  9%|▊         | 18/211 [00:08<01:23,  2.32i

Epoch 1, Loss: 1.4398090839385986



  0%|          | 0/211 [00:00<?, ?it/s][A
  0%|          | 1/211 [00:00<01:29,  2.35it/s][A
  1%|          | 2/211 [00:00<01:30,  2.31it/s][A
  1%|▏         | 3/211 [00:01<01:30,  2.29it/s][A
  2%|▏         | 4/211 [00:01<01:30,  2.29it/s][A
  2%|▏         | 5/211 [00:02<01:30,  2.27it/s][A
  3%|▎         | 6/211 [00:02<01:30,  2.27it/s][A
  3%|▎         | 7/211 [00:03<01:30,  2.26it/s][A
  4%|▍         | 8/211 [00:03<01:29,  2.27it/s][A
  4%|▍         | 9/211 [00:03<01:29,  2.25it/s][A
  5%|▍         | 10/211 [00:04<01:29,  2.26it/s][A
  5%|▌         | 11/211 [00:04<01:28,  2.25it/s][A
  6%|▌         | 12/211 [00:05<01:28,  2.25it/s][A
  6%|▌         | 13/211 [00:05<01:27,  2.25it/s][A
  7%|▋         | 14/211 [00:06<01:27,  2.25it/s][A
  7%|▋         | 15/211 [00:06<01:26,  2.26it/s][A
  8%|▊         | 16/211 [00:07<01:26,  2.25it/s][A
  8%|▊         | 17/211 [00:07<01:26,  2.24it/s][A
  9%|▊         | 18/211 [00:07<01:25,  2.25it/s][A
  9%|▉         | 19/211 [00:0

Epoch 2, Loss: 3.7212612628936768



  0%|          | 0/211 [00:00<?, ?it/s][A
  0%|          | 1/211 [00:00<01:28,  2.38it/s][A
  1%|          | 2/211 [00:00<01:31,  2.30it/s][A
  1%|▏         | 3/211 [00:01<01:31,  2.27it/s][A
  2%|▏         | 4/211 [00:01<01:31,  2.27it/s][A
  2%|▏         | 5/211 [00:02<01:31,  2.25it/s][A
  3%|▎         | 6/211 [00:02<01:30,  2.26it/s][A
  3%|▎         | 7/211 [00:03<01:31,  2.24it/s][A
  4%|▍         | 8/211 [00:03<01:30,  2.24it/s][A
  4%|▍         | 9/211 [00:03<01:30,  2.24it/s][A
  5%|▍         | 10/211 [00:04<01:29,  2.24it/s][A
  5%|▌         | 11/211 [00:04<01:28,  2.25it/s][A
  6%|▌         | 12/211 [00:05<01:28,  2.25it/s][A
  6%|▌         | 13/211 [00:05<01:27,  2.25it/s][A
  7%|▋         | 14/211 [00:06<01:27,  2.24it/s][A
  7%|▋         | 15/211 [00:06<01:27,  2.25it/s][A
  8%|▊         | 16/211 [00:07<01:26,  2.24it/s][A
  8%|▊         | 17/211 [00:07<01:26,  2.25it/s][A
  9%|▊         | 18/211 [00:07<01:25,  2.25it/s][A
  9%|▉         | 19/211 [00:0

Epoch 3, Loss: 1.8249884843826294


In [None]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:

# Load the fine-tuned GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained("./fine_tuned_gpt2")

def generate_text_with_label(label, max_length=100, temperature=0.7, top_k=50):
    input_text = label + " "  # Initialize with a label

    # Generate text with temperature and top-k sampling
    output = model.generate(
        input_ids=tokenizer.encode(input_text, return_tensors="pt"),
        max_length=max_length,
        num_return_sequences=1,
        pad_token_id=50256,  # The EOS token
        temperature=temperature,
        top_k=top_k
    )

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text

for _ in range(10):
    # Example usage
    prompt = "DRAG FORCE"
    generated_response = generate_text_with_label(prompt, max_length=100)
    print(generated_response)

DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
DRAG FORCE _______ FRONT BRAKE CALIPER LEVER, REPLACED CALIPER, AND REAR BRAKE CALIPER.
