Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib  


Dataset Preparation

In [None]:
file_path = 'Bitext_Sample_Customer_Service_Training_Dataset.csv'
df = pd.read_csv(file_path)

print(df.head())
print(df.info())

print(df.isnull().sum())


  flags                                          utterance category  \
0    BM            I have problems with canceling an order    ORDER   
1   BIM  how can I find information about canceling ord...    ORDER   
2     B          I need help with canceling the last order    ORDER   
3   BIP  could you help me cancelling the last order I ...    ORDER   
4     B            problem with cancelling an order I made    ORDER   

         intent  
0  cancel_order  
1  cancel_order  
2  cancel_order  
3  cancel_order  
4  cancel_order  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8175 entries, 0 to 8174
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   flags      8175 non-null   object
 1   utterance  8175 non-null   object
 2   category   8175 non-null   object
 3   intent     8175 non-null   object
dtypes: object(4)
memory usage: 255.6+ KB
None
flags        0
utterance    0
category     0
intent       0
dtype: int64


Data Exploration and Preprocessing

In [None]:
def clean_text(text):
    text = re.sub(r'\W', ' ', str(text))  
    text = re.sub(r'\s+', ' ', text)      
    text = text.lower()                   
    return text

df['utterance'] = df['utterance'].apply(clean_text)
df['intent'] = df['intent'].apply(clean_text)

X_train, X_test, y_train, y_test = train_test_split(df['utterance'], df['intent'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = vectorizer.fit_transform(X_train).toarray()
X_test_tfidf = vectorizer.transform(X_test).toarray()


Model Selection and Training

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)


Model Evaluation

In [None]:
y_pred = model.predict(X_test_tfidf)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


Classification Report:
                           precision    recall  f1-score   support

            cancel_order       0.98      0.98      0.98        62
            change_order       1.00      1.00      1.00        70
 change_shipping_address       1.00      0.98      0.99        60
  check_cancellation_fee       0.99      1.00      0.99        66
           check_invoice       1.00      1.00      1.00        63
   check_payment_methods       1.00      0.96      0.98        68
     check_refund_policy       0.98      1.00      0.99        59
               complaint       1.00      1.00      1.00        52
contact_customer_service       0.98      1.00      0.99        61
     contact_human_agent       1.00      1.00      1.00        57
          create_account       0.98      0.94      0.96        62
          delete_account       1.00      1.00      1.00        53
        delivery_options       0.96      1.00      0.98        55
         delivery_period       1.00      1.00      

 Dialog Management

In [None]:
intent_responses = df.set_index('intent')['utterance'].to_dict()


Chatbot Response Function


In [None]:
def chatbot_response(user_input):
    user_input_tfidf = vectorizer.transform([clean_text(user_input)]).toarray()
    
    intent = model.predict(user_input_tfidf)[0]
    
    response = intent_responses.get(intent, "I'm sorry, I didn't understand that. Could you please rephrase?")
    return response


Deployment

In [None]:
joblib.dump(model, 'chatbot_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')


['tfidf_vectorizer.pkl']

Continuous Improvement

In [None]:
import datetime

def log_interaction(user_input, response):
    with open("interaction_log.txt", "a") as file:
        file.write(f"{datetime.datetime.now()}: User: {user_input} | Chatbot: {response}\n")


Documentation and Reporting

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv("chatbot_evaluation_report.csv")

print("Report saved as chatbot_evaluation_report.csv")


Report saved as chatbot_evaluation_report.csv


In [None]:
print("Chatbot: Type 'quit' to exit")
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        print("Chatbot: Goodbye!")
        break
    response = chatbot_response(user_input)
    print(f"Chatbot: {response}")
    log_interaction(user_input, response)


Chatbot: Type 'quit' to exit
Chatbot: help recovering my password
Chatbot: help recovering my password
Chatbot: help recovering my password
Chatbot: help recovering my password
Chatbot: tell me more about user account deletions
Chatbot: help using another user profile
Chatbot: what do i have to do to check my order eta 
Chatbot: what do i have to do to check my order eta 
Chatbot: could you help me to cancel my last order 
Chatbot: help recovering my password
Chatbot: where do i check how soon can i expect the item 
Chatbot: i am trying to buy something
Chatbot: help recovering my password
Chatbot: Goodbye!
