In [2]:
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from scipy.special import softmax
from tqdm import tqdm
import numpy as np

# Load the multi-label classification model and tokenizer
model_clf = AutoModelForSequenceClassification.from_pretrained(r'C:\Users\sumeet4.singh\Desktop\senti_project\1. Feedback_Project\Scripts\fine_tuned_bart_model')
tokenizer_clf = AutoTokenizer.from_pretrained(r'C:\Users\sumeet4.singh\Desktop\senti_project\1. Feedback_Project\Scripts\fine_tuned_bart_model')

# Load the sentiment analysis model and tokenizer
tokenizer_senti = AutoTokenizer.from_pretrained(r"C:\Users\sumeet4.singh\Desktop\senti_project\1. Feedback_Project\Scripts\finetuned_trained_roberta(BASE)")
model_senti = AutoModelForSequenceClassification.from_pretrained(r"C:\Users\sumeet4.singh\Desktop\senti_project\1. Feedback_Project\Scripts\finetuned_trained_roberta(BASE)")

# Move models to device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_clf.to(device)
model_senti.to(device)

# Load the Excel file with feedback data
df = pd.read_excel(r'C:\Users\sumeet4.singh\Desktop\senti_project\1. Feedback_Project\Data\Benchmarking data.xlsx')

# Candidate labels for multi-label classification
candidate_labels = [
    "Service Quality", "Doctor Experience", "Nursing Experience",
    "Interaction with Staff", "Lab & Radiology Services", "Facilities and Infrastructure",
    "Billing and Payments", "Appointment Process", "Admission & Discharge",
    "Food & Beverages", "Housekeeping/PCA/PTA", "Others"
]

# Function for multi-label classification (categorization)
def preprocess_text(text, tokenizer, max_length=128):
    return tokenizer(text, padding=True, truncation=True, max_length=max_length, return_tensors='pt')

def predict_category_pt(text, model, tokenizer, candidate_labels, threshold=0.5):
    inputs = preprocess_text(text, tokenizer)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    model.eval()
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits
    probabilities = torch.sigmoid(logits)
    predicted_labels = (probabilities > threshold).int().squeeze().tolist()
    
    predicted_categories = [candidate_labels[i] for i in range(len(candidate_labels)) if predicted_labels[i] == 1]
    
    return predicted_categories, probabilities.squeeze().tolist()

def categorize_feedback_pt(df, model, tokenizer, candidate_labels, threshold=0.2):
    all_predictions = []
    all_probabilities = {label: [] for label in candidate_labels}
    
    for comment in tqdm(df['feedback'], desc="Categorizing Feedback"):
        predicted_categories, probabilities = predict_category_pt(comment, model, tokenizer, candidate_labels, threshold)
        all_predictions.append(predicted_categories)
        
        for i, label in enumerate(candidate_labels):
            all_probabilities[label].append(probabilities[i])
    
    df['Categories'] = all_predictions
    for label in candidate_labels:
        df[f'{label}_score'] = all_probabilities[label]
    
    return df

# Function for sentiment analysis
def analyze_sentiment_batch(comments_batch):
    comments_batch = [str(comment) if not isinstance(comment, str) else comment for comment in comments_batch]
    encoded_text = tokenizer_senti(
        comments_batch,
        return_tensors='pt',
        truncation=True,
        padding=True,
        max_length=512
    )

    encoded_text = {key: value.to(device) for key, value in encoded_text.items()}

    with torch.no_grad():
        output = model_senti(**encoded_text)

    scores = output[0].detach().cpu().numpy()
    scores = softmax(scores, axis=-1)

    return scores

def process_in_batches(df, batch_size=64):
    all_scores = []

    for i in tqdm(range(0, len(df), batch_size)):
        batch_comments = df['feedback'].iloc[i:i+batch_size].fillna('').astype(str).values
        scores = analyze_sentiment_batch(batch_comments)
        all_scores.append(scores)

    return np.vstack(all_scores)

# Function to add new comments to the DataFrame
def add_new_comments_to_df(df, new_comments):
    new_comments_df = pd.DataFrame(new_comments, columns=['feedback'])
    df = pd.concat([df, new_comments_df], ignore_index=True)
    return df

# Function to get comments from the terminal
def get_comments_from_terminal():
    new_comments = []
    print("Enter comments. Type 'done' when you're finished.")
    
    while True:
        comment = input("Enter a comment: ")
        if comment.lower() == 'done':
            break
        else:
            new_comments.append(comment)
    
    return new_comments

# Get new comments from the user via the terminal
new_comments = get_comments_from_terminal()

# Add new comments to the DataFrame
df = add_new_comments_to_df(df, new_comments)

# Process multi-label classification (categorization)
df = categorize_feedback_pt(df, model_clf, tokenizer_clf, candidate_labels)

# Process sentiment analysis
scores = process_in_batches(df)

# Convert sentiment scores to DataFrame columns
df['roberta_neg'] = scores[:, 0]
df['roberta_neu'] = scores[:, 1]
df['roberta_pos'] = scores[:, 2]

# Assign final sentiment based on highest score
sentiment_labels = ['Negative', 'Neutral', 'Positive']
df['Sentiment ROBERTA'] = [sentiment_labels[np.argmax(scores)] for scores in df[['roberta_neg', 'roberta_neu', 'roberta_pos']].values]

# Save results to a new Excel file
df.to_excel(r'C:\Users\sumeet4.singh\Desktop\senti_project\1. Feedback_Project\Data\Benchmarking data(CAT & SENTI).xlsx', index=False)

print("Categorization and Sentiment analysis complete! Results saved.")

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1', '2': 'LABEL_2', '3': 'LABEL_3', '4': 'LABEL_4', '5': 'LABEL_5', '6': 'LABEL_6', '7': 'LABEL_7', '8': 'LABEL_8', '9': 'LABEL_9', '10': 'LABEL_10', '11': 'LABEL_11'}. The number of labels wil be overwritten to 12.


Enter comments. Type 'done' when you're finished.


Enter a comment:  i love this hospital and the staff was very friendly
Enter a comment:  i dont linke the food as it was not that fresh
Enter a comment:  every think was good except, i want more beds in the icu 
Enter a comment:  done


Categorizing Feedback: 100%|█████████████████████████████████████████████████████████| 348/348 [00:38<00:00,  9.10it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:43<00:00,  7.19s/it]

Categorization and Sentiment analysis complete! Results saved.



