In [61]:
from sklearn.model_selection import train_test_split
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding
import evaluate
import numpy as np
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

def preprocess_function(examples):
    return tokenizer(examples["text"] , truncation=True)
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

dataset = load_dataset("csv", data_files="intent_recognition_training.csv",split="train")
dataset = dataset.rename_column("is_recommend", "label")
dataset = dataset.train_test_split(test_size=0.2)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
accuracy = evaluate.load('accuracy')

id2label = {0: "NO", 1: "YES"}
label2id = {"NO":0, "YES":1}

model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
training_args = TrainingArguments(
    output_dir="intent_recognition_model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()
trainer.save_model("intent_recognition_model")


In [14]:
import pandas as pd
from transformers import pipeline
classifier = pipeline('text-classification', model='./intent_recognition_model',max_length=512)
df = pd.read_csv('comments.csv')
df = df.dropna(subset=['text'])
comments = df['text'].tolist()
x = classifier(comments)
res = []
for i in x:
    if i['label'] == 'YES':
        res.append(1)
    else:
        res.append(0)
df['is_recommend'] = res
df.to_csv('laptop_found_intent.csv', index=False)
