In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
import re
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, AdamW, get_scheduler
from sklearn.metrics import accuracy_score


In [None]:
# Configuration
num_epochs = 25# Number of epochs for training
batch_size = 32 # Batch size
learning_rate = 3e-5  # Learning rate

# Load the CSV file

df = pd.read_excel('final.xlsx')

In [None]:

# Prepare training data
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
def preprocess_text(text):
  # Download stopwords (first time only)
  nltk.download('stopwords')
  stop_words = set(stopwords.words('english'))

  # Lowercase text
  text = text.lower()

  # Remove punctuation
  text = re.sub(r'[^\w\s]', '', text)
  text = ' '.join([word for word in text.split() if word not in stop_words])

  return text

train_questions = df['Question'].apply(preprocess_text).tolist()
train_inputs = tokenizer(train_questions, padding=True, truncation=True, max_length=16, return_tensors="pt")
print(train_questions)
print(train_inputs)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_

['place order', 'steps need follow place order', 'make order', 'guide make purchase', 'order product', 'go ordering something', 'whats process placing order', 'could tell order item', 'order arrive', 'soon expect receive order', 'estimated delivery time order', 'could let know order delivered', 'much time delivery take', 'many days delivery take', 'anticipate arrival order', 'receive order', 'much delivery cost', 'ho much money delivery', 'delivery price', 'fee delivery', 'much charged delivery', 'could inform delivery cost', 'encountered problem shopping website', 'faced issue trying make purchase website', 'theres problem encountered shopping online site', 'ran difficulty browsing website shopping', 'experienced problem shopping session website', 'shopping website encountered problem', 'want cancel order', 'dont want order anymore', 'would like cancel order', 'cancel order please', 'need cancel order placed', 'possible cancel order', 'id like request cancellation order', 'estimated t

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package stopwords to /ro

In [None]:
# Encode labels
# Exemple de définition de `response_to_label`
unique_responses = df['Answer '].unique().tolist()
print(unique_responses)
response_to_label = {response: i for i, response in enumerate(unique_responses)}
print(response_to_label)
# Exemple d'utilisation dans train_df et val_df
train_labels = df['Answer '].apply(lambda x: response_to_label.get(x, -1)).tolist()
print(train_labels)

train_labels = torch.tensor(train_labels)


['Welcome! You can place your order on our website giftedstore.tn or send us a direct message on Instagram.  If you choose Instagram, please include your name, phone number, address, and the desired product', 'You will receive your product within 2 to 4 working days maximum', 'The delivery price is 7 TND.', "We're so sorry to hear you encountered a problem while shopping on our website! To help us resolve the issue quickly, could you please tell us a little more about the problem you experienced? In the meantime, you can still place your order through our Instagram DMs! Just send us a message with the following information: Your Name Phone Number Delivery Address List of Items You'd Like to Order (including any specific details like color or personalization options, if applicable)", 'Hello! Your order is already out for delivery. If you still wish to cancel, you can inform the delivery company when they call you. However, if you prefer to keep your order, it should arrive soon! Can we 

In [None]:
# Create DataLoaders
train_dataset = TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


In [None]:
# Initialize the model
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=len(unique_responses))
optimizer = AdamW(model.parameters(), lr=learning_rate)
num_training_steps = len(train_loader) * num_epochs
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0

    for batch in train_loader:
        inputs, masks, labels = [item for item in batch]

        optimizer.zero_grad()
        outputs = model(input_ids=inputs, attention_mask=masks, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        correct_predictions += (preds == labels).sum().item()
        total_predictions += labels.size(0)

    avg_train_loss = total_loss / len(train_loader)
    train_accuracy = correct_predictions / total_predictions

    print(f'Epoch {epoch + 1}: Average Loss = {avg_train_loss:.4f}, Accuracy = {train_accuracy:.4f}')


Epoch 1: Average Loss = 3.5887, Accuracy = 0.0396
Epoch 2: Average Loss = 3.5197, Accuracy = 0.0941
Epoch 3: Average Loss = 3.4290, Accuracy = 0.2178
Epoch 4: Average Loss = 3.2926, Accuracy = 0.3713
Epoch 5: Average Loss = 3.1446, Accuracy = 0.5990
Epoch 6: Average Loss = 2.9876, Accuracy = 0.7475
Epoch 7: Average Loss = 2.8449, Accuracy = 0.7723
Epoch 8: Average Loss = 2.6962, Accuracy = 0.8168
Epoch 9: Average Loss = 2.5688, Accuracy = 0.8614
Epoch 10: Average Loss = 2.4403, Accuracy = 0.8861
Epoch 11: Average Loss = 2.2837, Accuracy = 0.8960
Epoch 12: Average Loss = 2.1702, Accuracy = 0.9010
Epoch 13: Average Loss = 2.0713, Accuracy = 0.9208
Epoch 14: Average Loss = 1.9966, Accuracy = 0.9554
Epoch 15: Average Loss = 1.8971, Accuracy = 0.9505
Epoch 16: Average Loss = 1.8260, Accuracy = 0.9505
Epoch 17: Average Loss = 1.8115, Accuracy = 0.9505
Epoch 18: Average Loss = 1.6809, Accuracy = 0.9455
Epoch 19: Average Loss = 1.6555, Accuracy = 0.9505
Epoch 20: Average Loss = 1.6135, Accurac

In [None]:
# Fonction de prédiction
def predict(question, model, tokenizer, label_to_response):
    model.eval()
    inputs = tokenizer(question, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    predicted_label = torch.argmax(logits, dim=1).item()
    return label_to_response[predicted_label]

# Inverser le dictionnaire des labels
label_to_response = {v: k for k, v in response_to_label.items()}

# Fonction de discussion interactive
def chat(model, tokenizer, label_to_response):
    print("Vous pouvez commencer à poser des questions. Tapez 'exit' pour quitter la discussion.")
    while True:
        question = input("Vous: ")
        if question.lower() == 'exit':
            print("Discussion terminée.")
            break
        response = predict(question, model, tokenizer, label_to_response)
        print(f"Bot: {response}")

# Exemple d'utilisation
chat(model, tokenizer, label_to_response)

Vous pouvez commencer à poser des questions. Tapez 'exit' pour quitter la discussion.
Vous: bye
Bot: see you soon 
Vous: thank you
Bot: you're welcome , see you soon 
Vous: how much the delivery cost
Bot: The delivery price is 7 TND.
Vous: how many days the delivery
Bot: You will receive your product within 2 to 4 working days maximum
Vous: can you talk xith the delivery to have it tommorow
Bot: We will get in touch with them to see how we can best assist you.
Vous: exit
Discussion terminée.
