In [None]:
from transformers import BertTokenizer, BertModel, BertForSequenceClassification
import torch
import numpy as np
from .autonotebook import tqdm as notebook_tqdm 

# Load pre-trained model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=1)

# Encode input text
input_text = "The user prefers a red, 2023, automatic transmission, electric car with a budget of $50,000."
inputs = tokenizer(input_text, return_tensors="pt")

# Fine-tune the model (assuming you have a DataLoader for your dataset)
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    evaluation_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()

# Get embeddings and make recommendations
with torch.no_grad():
    user_input = inputs.to(device)
    user_embedding = model.bert(user_input)['pooler_output']
    
    # Calculate car embeddings for all cars in the dataset
    car_embeddings = []
    for car_text in car_texts:
        car_input = tokenizer(car_text, return_tensors="pt").to(device)
        car_embedding = model.bert(car_input)['pooler_output']
        car_embeddings.append(car_embedding.squeeze().detach().cpu().numpy())
    car_embeddings = np.stack(car_embeddings)
    
    # Calculate similarity scores between user preferences and car embeddings
    similarity_scores = torch.matmul(user_embedding, torch.tensor(car_embeddings).T)
    
    # Sort cars by similarity scores
    recommended_cars = torch.argsort(similarity_scores, descending=True).squeeze().tolist()
    
# Print top-k recommended cars
top_k = 5
for i in range(top_k):
    print(f"Rank {i+1}: Car {recommended_cars[i]}, Similarity Score: {similarity_scores.squeeze()[recommended_cars[i]]}")


User dataset

http://users.cecs.anu.edu.au/~u4940058/CarPreferences.html

User preference
https://www.kaggle.com/datasets/steventaylor11/stated-preferences-for-car-choice?resource=download

Methodology
https://nycdatascience.com/blog/student-works/data-study-on-car-brand-preferences/



