In [2]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments

In [106]:
from sklearn.utils.class_weight import compute_class_weight

In [3]:
df = pd.read_csv("Final_AB_Complaint_Classification.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Cleaned_Review,Cleaned_Name,Topics,Category,Is_Repeated,Sentiment_Score,Customer_Satisfaction,Review_Days,Response_Days,...,Food Options,Food Quality,Atmosphere,Value for Money,Hygiene,Others,Severity_Label,Urgency_Label,Frequency_Label,Unique_Customer_Label
0,0,stop eating place visited bangalores nd punes ...,pramod kumar,-1.0,['Others'],False,-0.35,No Response,180.0,-1,...,0,0,0,0,0,1,0,1,1,1
1,1,food service ambience,abhinav deep,9.0,"['Food Quality', 'Atmosphere', 'Service Issue']",False,0.0,High Satisfaction,365.0,365,...,0,1,1,0,0,0,2,0,1,0
2,2,idiotic varieties price charged varieties boil...,vijay nammi,-1.0,['Others'],False,-0.388889,No Response,30.0,-1,...,0,0,0,0,0,1,0,1,1,0
3,3,posting live one worst places dont visit pathe...,surya ajay,34.0,"['Service Issue', 'Food Quality']",False,-0.772727,High Satisfaction,365.0,365,...,0,1,0,0,0,0,0,1,1,0
4,4,pure vegetarians ordered veg biryani swiggy go...,sai hithesh,5.0,['Food Options'],False,0.214286,No Response,180.0,-1,...,1,0,0,0,0,0,1,0,1,0


In [108]:
df.shape

(4205, 21)

In [109]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("PyTorch version:", torch.__version__)
print("Number of GPUs:", torch.cuda.device_count())


CUDA available: True
CUDA version: 12.1
PyTorch version: 2.5.0+cu121
Number of GPUs: 1


In [110]:
df.columns

Index(['Unnamed: 0', 'Cleaned_Review', 'Cleaned_Name', 'Topics', 'Category',
       'Is_Repeated', 'Sentiment_Score', 'Customer_Satisfaction',
       'Review_Days', 'Response_Days', 'Service Issue', 'Food Options',
       'Food Quality', 'Atmosphere', 'Value for Money', 'Hygiene', 'Others',
       'Severity_Label', 'Urgency_Label', 'Frequency_Label',
       'Unique_Customer_Label'],
      dtype='object')

In [111]:
# Split the data into training and testing sets
X = df['Cleaned_Review']
y = df[['Service Issue', "Food Options", "Food Quality", "Atmosphere", "Value for Money", "Hygiene", "Others"]]
y = y.values  # Convert DataFrame to numpy array

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [112]:
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


In [113]:
# Tokenize the input text
train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=128)

In [114]:
# Create a PyTorch dataset class
class ComplaintDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)  # Use float for multi-label
        return item

    def __len__(self):
        return len(self.labels)

# Create datasets
train_dataset = ComplaintDataset(train_encodings, y_train)
test_dataset = ComplaintDataset(test_encodings, y_test)

In [115]:
# Load the BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=y.shape[1])

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [116]:
# Compute class weights to handle class imbalance
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train.flatten())
class_weights = torch.tensor(class_weights, dtype=torch.float).to('cuda' if torch.cuda.is_available() else 'cpu')

In [117]:
# Define a custom loss function
class WeightedBCEWithLogitsLoss(torch.nn.Module):
    def __init__(self, weights):
        super().__init__()
        self.loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=weights)

    def forward(self, logits, labels):
        return self.loss_fn(logits, labels)

In [118]:
# Define training arguments with adjustments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,  # Increase the number of epochs
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    fp16=True,
    eval_strategy="epoch",  # Change this line from evaluation_strategy to eval_strategy
)

In [119]:
def compute_metrics(p):
    # Thresholding predictions for multi-label accuracy
    predictions = (p.predictions > 0).astype(int)  # Binary predictions
    accuracy = (predictions == p.label_ids).mean(axis=1).mean()  # Mean accuracy across samples
    return {
        'accuracy': accuracy
    }

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics  # Use the defined function
)


In [120]:
# Train the model
trainer.train()


  0%|          | 0/1263 [00:00<?, ?it/s]

{'loss': 0.7146, 'grad_norm': 2.9257380962371826, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.02}
{'loss': 0.6922, 'grad_norm': 2.5693769454956055, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.05}
{'loss': 0.6836, 'grad_norm': 2.2068824768066406, 'learning_rate': 3e-06, 'epoch': 0.07}
{'loss': 0.6621, 'grad_norm': 2.092958927154541, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1}
{'loss': 0.6515, 'grad_norm': 1.8742966651916504, 'learning_rate': 5e-06, 'epoch': 0.12}
{'loss': 0.6262, 'grad_norm': 1.9232337474822998, 'learning_rate': 6e-06, 'epoch': 0.14}
{'loss': 0.5995, 'grad_norm': 2.1151862144470215, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.17}
{'loss': 0.5596, 'grad_norm': 2.0005648136138916, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.19}
{'loss': 0.5186, 'grad_norm': 1.6371268033981323, 'learning_rate': 9e-06, 'epoch': 0.21}
{'loss': 0.5038, 'grad_norm': 1.0715683698654175, 'learning_rate': 1e-05, 'epoch': 0.24}
{'loss': 0.5185, 'grad_norm': 

  0%|          | 0/106 [00:00<?, ?it/s]

{'eval_loss': 0.36681947112083435, 'eval_accuracy': 0.8235094275522337, 'eval_runtime': 175.2018, 'eval_samples_per_second': 4.8, 'eval_steps_per_second': 0.605, 'epoch': 1.0}
{'loss': 0.3436, 'grad_norm': 7.286369323730469, 'learning_rate': 4.3e-05, 'epoch': 1.02}
{'loss': 0.3715, 'grad_norm': 1.7943238019943237, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.05}
{'loss': 0.3326, 'grad_norm': 1.9986101388931274, 'learning_rate': 4.5e-05, 'epoch': 1.07}
{'loss': 0.3612, 'grad_norm': 1.4151784181594849, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.09}
{'loss': 0.3661, 'grad_norm': 2.319204568862915, 'learning_rate': 4.7e-05, 'epoch': 1.12}
{'loss': 0.3697, 'grad_norm': 1.7289930582046509, 'learning_rate': 4.8e-05, 'epoch': 1.14}
{'loss': 0.3839, 'grad_norm': 1.7256921529769897, 'learning_rate': 4.9e-05, 'epoch': 1.16}
{'loss': 0.3693, 'grad_norm': 2.247877836227417, 'learning_rate': 5e-05, 'epoch': 1.19}
{'loss': 0.346, 'grad_norm': 1.6607941389083862, 'learning_rate': 4.9344

  0%|          | 0/106 [00:00<?, ?it/s]

{'eval_loss': 0.32727956771850586, 'eval_accuracy': 0.8462714455580091, 'eval_runtime': 179.1888, 'eval_samples_per_second': 4.693, 'eval_steps_per_second': 0.592, 'epoch': 2.0}
{'loss': 0.2957, 'grad_norm': 3.009728193283081, 'learning_rate': 2.7064220183486238e-05, 'epoch': 2.02}
{'loss': 0.2766, 'grad_norm': 2.741769313812256, 'learning_rate': 2.6408912188728702e-05, 'epoch': 2.04}
{'loss': 0.2591, 'grad_norm': 1.2283294200897217, 'learning_rate': 2.575360419397117e-05, 'epoch': 2.07}
{'loss': 0.2396, 'grad_norm': 2.654691219329834, 'learning_rate': 2.5098296199213634e-05, 'epoch': 2.09}
{'loss': 0.2696, 'grad_norm': 4.424205780029297, 'learning_rate': 2.4442988204456098e-05, 'epoch': 2.11}
{'loss': 0.2466, 'grad_norm': 4.38755464553833, 'learning_rate': 2.378768020969856e-05, 'epoch': 2.14}
{'loss': 0.2571, 'grad_norm': 2.9645440578460693, 'learning_rate': 2.3132372214941023e-05, 'epoch': 2.16}
{'loss': 0.2582, 'grad_norm': 1.682331919670105, 'learning_rate': 2.2477064220183487e-05

  0%|          | 0/106 [00:00<?, ?it/s]

{'eval_loss': 0.3148002624511719, 'eval_accuracy': 0.859860710039069, 'eval_runtime': 178.0788, 'eval_samples_per_second': 4.723, 'eval_steps_per_second': 0.595, 'epoch': 3.0}
{'train_runtime': 8005.2001, 'train_samples_per_second': 1.261, 'train_steps_per_second': 0.158, 'train_loss': 0.3473458559571611, 'epoch': 3.0}


TrainOutput(global_step=1263, training_loss=0.3473458559571611, metrics={'train_runtime': 8005.2001, 'train_samples_per_second': 1.261, 'train_steps_per_second': 0.158, 'total_flos': 409725472825800.0, 'train_loss': 0.3473458559571611, 'epoch': 3.0})

In [121]:
# Evaluate the model
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=1)

  0%|          | 0/106 [00:00<?, ?it/s]

In [122]:
# # Get classification report
# print(classification_report(y_test, preds, target_names=[
#     'Service Issue', 'Technical Issue', 'Food Quality', 
#     'Atmosphere', 'Value for Money', 'Others', 'Hygiene'
# ]))

In [127]:
# Evaluate the model
predictions = trainer.predict(test_dataset)

# Convert predictions to binary format
preds = (predictions.predictions > 0.5).astype(int)

# Get classification report
print(classification_report(y_test, preds, target_names=[
    'Service Issue', 'Food Options', 'Food Quality', 
    'Atmosphere', 'Value for Money', 'Others', 'Hygiene'
]))


                 precision    recall  f1-score   support

  Service Issue       0.67      0.55      0.61       173
   Food Options       0.77      0.71      0.74       188
   Food Quality       0.77      0.45      0.56       191
     Atmosphere       0.75      0.70      0.72       191
Value for Money       0.56      0.23      0.33        39
         Others       0.88      0.43      0.58        70
        Hygiene       0.59      0.42      0.49       346

      micro avg       0.70      0.53      0.60      1198
      macro avg       0.71      0.50      0.57      1198
   weighted avg       0.70      0.53      0.59      1198
    samples avg       0.54      0.51      0.52      1198



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [128]:
predictions

PredictionOutput(predictions=array([[-3.7128906 , -1.8447266 , -0.09204102, ..., -4.4179688 ,
        -4.6484375 ,  0.5004883 ],
       [-4.5390625 ,  1.4921875 ,  0.6582031 , ..., -2.6933594 ,
        -4.1796875 , -1.9072266 ],
       [-0.640625  , -4.9140625 , -2.3671875 , ..., -5.3398438 ,
        -4.1640625 ,  0.9921875 ],
       ...,
       [-1.5078125 , -3.7675781 , -4.4296875 , ..., -5.390625  ,
        -4.078125  ,  0.30566406],
       [-3.3378906 , -3.0566406 , -4.09375   , ..., -4.3945312 ,
        -4.1679688 , -0.36083984],
       [ 0.66748047, -4.9570312 , -3.2832031 , ..., -5.4140625 ,
        -3.2324219 , -0.16320801]], dtype=float32), label_ids=array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32), metrics={'test_loss': 0.3148002624511719, 'test_accuracy': 0.85986071003906

In [130]:
import pickle
from sklearn.metrics import classification_report

# Evaluate the model
predictions = trainer.predict(test_dataset)

# Convert predictions to binary format
preds = (predictions.predictions > 0.5).astype(int)

# Get the classification report
report = classification_report(y_test, preds, target_names=[
    'Service Issue', "Food Options", 'Food Quality',
    'Atmosphere', 'Value for Money', 'Others', 'Hygiene'
], output_dict=True)  # Use output_dict=True to get the report as a dictionary

# Save the predictions and classification report to a dictionary
output_data = {
    'predictions': preds,
    'classification_report': report
}

# Save the dictionary as a .pkl file
with open('model_evaluation_output.pkl', 'wb') as file:
    pickle.dump(output_data, file)

print("Output saved as 'model_evaluation_output.pkl'.")


  0%|          | 0/106 [00:00<?, ?it/s]

Output saved as 'model_evaluation_output.pkl'.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [132]:
# Save the fine-tuned model and tokenizer
save_directory = './results'  # Path where your fine-tuned model is saved
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}.")


Model and tokenizer saved to ./results.


In [134]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Load the fine-tuned BERT model and tokenizer
model_path = './results' # Path where the saved model and tokenizer are stored
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()  # Set model to evaluation mode



In [153]:
# Function to classify a new review
def classify_review(review):
    # Tokenize the review
    inputs = tokenizer(review, truncation=True, padding=True, max_length=128, return_tensors='pt')

    # Move tensors to GPU if available
    inputs = {key: val.to(device) for key, val in inputs.items()}

    # Get the model's prediction
    with torch.no_grad():
        outputs = model(**inputs)

    # Apply sigmoid to get probabilities for multi-label classification
    logits = outputs.logits
    probs = torch.sigmoid(logits).cpu().numpy()

    # Convert probabilities to binary predictions (threshold of 0.5)
    predictions = (probs > 0.5).astype(int)

    # Define the categories
    categories = ['Service Issue', "Food Options", 'Food Quality', 
                  'Atmosphere', 'Value for Money', 'Others', 'Hygiene']

    # Extract the categories where predictions are True
    predicted_categories = [category for category, pred in zip(categories, predictions[0]) if pred]

    return predicted_categories

In [154]:
# Example usage
new_review = "Service and food not good; I don't like the ambience."
classification_result = classify_review(new_review)

In [155]:
# Print only the categories that are relevant
print("Relevant Categories:")
print(classification_result)


Relevant Categories:
['Service Issue', 'Food Quality', 'Atmosphere']


In [156]:
# Save the function to a pickle file
with open('classification_function.pkl', 'wb') as f:
    pickle.dump(classify_review, f)

print("Classification function has been saved to classification_function.pkl")

Classification function has been saved to classification_function.pkl


In [157]:
import pickle

# Load the classification function from the pickle file
with open('classification_function.pkl', 'rb') as f:
    classify_review = pickle.load(f)

# Example review
new_review = "I had a great time, but the food was subpar."
classification_result = classify_review(new_review)

print("Relevant Categories from loaded function:")
print(classification_result)


Relevant Categories from loaded function:
['Hygiene']


In [140]:
from datetime import datetime, timedelta

In [150]:


# Function to get complaint counts, categories, and total complaints for the specified days
def get_complaint_counts(df, days):
    # Filter the DataFrame to get rows where Review_Days is less than or equal to the specified days
    filtered_df = df[df['Review_Days'] <= days]

    # Define the category columns explicitly
    category_columns = [
        'Service Issue',
        'Food Options',
        'Food Quality',
        'Atmosphere',
        'Value for Money',
        'Others',
        'Hygiene'
    ]

    # Count occurrences for each specified category
    category_counts = filtered_df[category_columns].sum()  # Use the list of column names

    # Create a dictionary to hold the categories and their counts
    complaint_info = {
        'categories': category_counts.index.tolist(),
        'counts': category_counts.values.tolist(),
        'total_complaints': filtered_df.shape[0]  # Count of total complaints
    }
    
    return complaint_info




In [151]:
days = 30  # Change this to 30, 60, or 90 as needed
complaint_info = get_complaint_counts(df, days)


In [152]:
print(f"\nComplaint Counts for the Last {days} Days:")
for category, count in zip(complaint_info['categories'], complaint_info['counts']):
        print(f"{category}: {count}")
print(f"Total Complaints for the Last {days} Days: {complaint_info['total_complaints']}")


Complaint Counts for the Last 30 Days:
Service Issue: 15
Food Options: 11
Food Quality: 17
Atmosphere: 14
Value for Money: 1
Others: 22
Hygiene: 5
Total Complaints for the Last 30 Days: 64


In [4]:
import pandas as pd
import pickle

# Function to get complaint counts, categories, and total complaints for the specified days
def get_complaint_counts(df, days):
    # Filter the DataFrame to get rows where Review_Days is less than or equal to the specified days
    filtered_df = df[df['Review_Days'] <= days]

    # Define the category columns explicitly
    category_columns = [
        'Service Issue',
        'Food Options',
        'Food Quality',
        'Atmosphere',
        'Value for Money',
        'Others',
        'Hygiene'
    ]

    # Count occurrences for each specified category
    category_counts = filtered_df[category_columns].sum()  # Use the list of column names

    # Create a dictionary to hold the categories and their counts
    complaint_info = {
        'categories': category_counts.index.tolist(),
        'counts': category_counts.values.tolist(),
        'total_complaints': filtered_df.shape[0]  # Count of total complaints
    }
    
    return complaint_info

# Specify the days you want to check
days = 30  # Change this to 30, 60, or 90 as needed
complaint_info = get_complaint_counts(df, days)

# Print the complaint counts
print(f"\nComplaint Counts for the Last {days} Days:")
for category, count in zip(complaint_info['categories'], complaint_info['counts']):
    print(f"{category}: {count}")
print(f"Total Complaints for the Last {days} Days: {complaint_info['total_complaints']}")

# Save the function to a pickle file
functions = {
    'get_complaint_counts': get_complaint_counts
}

with open('complaint_functions.pkl', 'wb') as f:
    pickle.dump(functions, f)

print("Complaint counting function has been saved to complaint_functions.pkl")



Complaint Counts for the Last 30 Days:
Service Issue: 15
Food Options: 11
Food Quality: 17
Atmosphere: 14
Value for Money: 1
Others: 22
Hygiene: 5
Total Complaints for the Last 30 Days: 64
Complaint counting function has been saved to complaint_functions.pkl


In [160]:
# import pickle
# from transformers import GPT2Tokenizer, GPT2LMHeadModel

# # Load the DistilGPT-2 model and tokenizer
# model_name = 'distilgpt2'
# tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# model = GPT2LMHeadModel.from_pretrained(model_name)

# # Save the model and tokenizer in a single pickle file
# with open('distil_gpt_model.pkl', 'wb') as f:
#     pickle.dump({'model': model, 'tokenizer': tokenizer}, f)

# print("DistilGPT-2 model and tokenizer saved successfully!")


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

DistilGPT-2 model and tokenizer saved successfully!


In [166]:
# import pickle
# import torch

# # Load DistilGPT-2 model and tokenizer from a single pickle file
# with open('distil_gpt_model.pkl', 'rb') as f:
#     gpt_data = pickle.load(f)
#     model = gpt_data['model']
#     tokenizer = gpt_data['tokenizer']

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.to(device)
# model.eval()

# def generate_response(categories, severity, urgency, complaint_type):
#     prompt = (
#         f"You are a customer service representative responding to a complaint.\n"
#         f"Categories: {', '.join(categories)}\n"
#         f"Severity: {severity}\n"
#         f"Urgency: {urgency}\n"
#         f"Complaint Type: {complaint_type}\n"
#         "Imagine the customer feels frustrated due to this issue. "
#         "Respond in a way that shows understanding and offers a resolution."
#     )
    
#     # Encode the prompt
#     inputs = tokenizer.encode(prompt, return_tensors='pt').to(device)

#     # Set pad_token_id and attention_mask
#     pad_token_id = tokenizer.eos_token_id
#     attention_mask = torch.ones(inputs.shape, device=device)

#     # Generate response
#     with torch.no_grad():
#         outputs = model.generate(
#             inputs,
#             max_length=150,
#             num_return_sequences=1,
#             no_repeat_ngram_size=2,
#             early_stopping=True,
#             num_beams=5,
#             pad_token_id=pad_token_id,
#             attention_mask=attention_mask,
#             do_sample=True,
#             temperature=0.7,
#             top_k=50,
#             top_p=0.95
#         )

#     # Decode and return the generated response
#     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
#     return response.replace(prompt, "").strip()




# # Example usage
# categories = ["Service Issue", "Food Quality"]
# severity = "High"
# urgency = "Urgent"
# complaint_type = "First Time"

# auto_response = generate_response(categories, severity, urgency, complaint_type)
# print("Auto-Response:")
# print(auto_response)


Auto-Response:
If you have a problem with your service, you can contact Customer Service at 1-800-222-8477.


In [171]:
def generate_response(categories, severity, urgency, complaint_type, previous_complaint):
    # Initialize the base response
    base_response = (
        "Hi,\n"
        "We regret the inconveniences you have faced."
    )

    # Dynamic sentence parts based on categories
    category_sentences = {
        'Service Issue': "We understand that service issues can be frustrating, and we are committed to resolving this.",
        'Food Options': "We appreciate your feedback on our food options, and we are continually working to enhance our menu.",
        'Food Quality': "We take food quality seriously, and we apologize for not meeting your expectations.",
        'Atmosphere': "Creating a pleasant atmosphere is important to us, and we are sorry that we fell short.",
        'Value for Money': "We strive to provide value for money, and we appreciate your input on this matter.",
        'Hygiene': "Hygiene is our top priority, and we are dedicated to maintaining the highest standards.",
        'Others': "Thank you for sharing your thoughts; we take all feedback seriously."
    }

    # Construct the response based on categories
    category_responses = [category_sentences[category] for category in categories if category in category_sentences]
    
    # Add severity and urgency
    if severity == "High":
        severity_response = "We recognize the urgency of this matter."
    elif severity == "Medium":
        severity_response = "We acknowledge the issues you've raised."
    else:
        severity_response = "We appreciate your feedback."

    if urgency == "Urgent":
        urgency_response = "We will prioritize your concern and address it immediately."
    else:
        urgency_response = "We will take your feedback into consideration and work on improvements."

    # Append the responses to the base response
    full_response = f"{base_response}"+" " + " ".join(category_responses) + f"{severity_response}\n{urgency_response}"
    
    # Add repeated or first-time complaint information based on the type of complaint
    if previous_complaint:
        full_response += "We recognize that this type of complaint has been registered before, and we are committed to resolving it to prevent future occurrences."
    else:
        full_response += "This is the first time we have received this type of complaint, and we assure you it will be addressed promptly."

    # Closing statement without the restaurant name
    full_response += "\n\nWe request you to share your contact information at wecare@restaurant.com. We shall connect with you in no time and assist you with the issue raised. We assure you that this won't happen again next time.\n\nTeam Restaurant"

    return full_response

# Example usage
categories = ["Service Issue", "Food Quality"]
severity = "High"
urgency = "Urgent"
complaint_type = "First Time"
previous_complaint = True  # Set to True if this type of complaint has been registered before

auto_response = generate_response(categories, severity, urgency, complaint_type, previous_complaint)
print("Auto-Response:")
print(auto_response)


Auto-Response:
Hi,
We regret the inconveniences you have faced. We understand that service issues can be frustrating, and we are committed to resolving this. We take food quality seriously, and we apologize for not meeting your expectations.We recognize the urgency of this matter.
We will prioritize your concern and address it immediately.We recognize that this type of complaint has been registered before, and we are committed to resolving it to prevent future occurrences.

We request you to share your contact information at wecare@restaurant.com. We shall connect with you in no time and assist you with the issue raised. We assure you that this won't happen again next time.

Team Restaurant


In [172]:
# Create a pickle file
with open('response_generator.pkl', 'wb') as file:
    pickle.dump(generate_response, file)

print("Pickle file 'response_generator.pkl' created successfully.")

Pickle file 'response_generator.pkl' created successfully.


In [5]:
import pickle

# Example of loading a pickle file
with open('complaint_functions.pkl', 'rb') as f:
    obj = pickle.load(f)
print(obj)


{'get_complaint_counts': <function get_complaint_counts at 0x000001B0255660E0>}
