In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load pre-trained LLM and tokenizer
model_name = "Llama3.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define custom dataset class for transcripts
class TranscriptDataset(torch.utils.data.Dataset):
    def __init__(self, transcripts, labels):
        self.transcripts = transcripts
        self.labels = labels

    def __getitem__(self, idx):
        transcript = self.transcripts[idx]
        label = self.labels[idx]

        encoding = tokenizer(transcript, return_tensors="pt", max_length=512, truncation=True)
        return {
            "input_ids": encoding["input_ids"].flatten(),
            "attention_mask": encoding["attention_mask"].flatten(),
            "labels": torch.tensor(label),
        }

    def __len__(self):
        return len(self.transcripts)

# Load and prepare dataset
transcripts = ["Transcript 1", "Transcript 2"]
labels = [0, 1]  # Example labels for product features
dataset = TranscriptDataset(transcripts, labels)

# Fine-tune the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

for epoch in range(5):
    model.train()
    for batch in torch.utils.data.DataLoader(dataset, batch_size=16):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = criterion(outputs.logits, labels)

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

# Use the fine-tuned model for product description generation
def generate_product_description(transcript):
    inputs = tokenizer(transcript, return_tensors="pt")
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
transcript = "Discuss the new smartwatch features."
description = generate_product_description(transcript)
print(description)