SENTIMENT ANALYSIS

In [None]:
# Importing required modules
import pandas as pd
from openai import AzureOpenAI
import re

In [None]:
# Set variables for Azure OpenAI
AZURE_OPENAI_API_KEY = ""
AZURE_OPENAI_ENDPOINT = ""

In [None]:
# Configure Azure OpenAI using LLM
llm = AzureOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    api_version='',
    azure_endpoint=AZURE_OPENAI_ENDPOINT
)

In [None]:
# Load the dataset from user-provided Excel file
# Note: The dataset should contain columns with client feedback. 
# The first column is assumed to be an ID or index column, and the remaining columns are feature columns with feedback.
data = pd.read_excel("your_excel_file.xlsx")

# Print the column names and a sample of the data to understand its structure
print("Columns in the dataset:", data.columns)
print("Sample data:\n", data.head())

In [None]:
# Define a function to clean text data
def clean_text(text):
    # Remove non-ASCII characters
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  
    # Remove tabs, newlines, and carriage returns
    text = re.sub(r'[\t\n\r]+', ' ', text)     
    # Replace multiple spaces with a single space and strip leading/trailing spaces
    text = re.sub(r'\s+', ' ', text).strip()   
    # Remove control characters
    text = re.sub(r'[\000-\010]|[\013-\014]|[\016-\037]', '', text) 
    return text

In [None]:
# Define a function to analyze the sentiment
def analyze_sentiment(feedback):
    prompt = f"""The data provided consists of feedback given by the client. Your task is to analyze the sentiment of the feedback and categorize it as either "progressive," "lagging," "stagnant," or "unclear."

- The sentiment is considered "progressive" if the feedback consists of one or more words including positive words or indicates that progress has been made. Positive feedback that shows approval, satisfaction, or any sign of advancement falls under this category.

- The sentiment is considered "lagging" if the feedback consists of one or more words including negative words or indicates that the competitor is performing better. Negative feedback that highlights limitations, dissatisfaction, or points out that a competitor's performance or features are superior should be categorized as lagging.

- The sentiment is considered "stagnant" if the feedback consists of one or more words that suggest no growth or no progress. Feedback that indicates a pause, uncertainty, or a lack of improvement should be categorized as stagnant. When the feedback includes both positive and negative aspects, it needs to be categorized as stagnant.

- The sentiment is considered "unclear" if the feedback is a single word or if it does not provide enough context to categorize it as "progressive," "lagging," or "stagnant".

For "unclear" feedback, simply return the word "Unclear". For other categories, provide the sentiment, a score between 0-100, and the reason.

The output should be in the following format:
Sentiment: [progressive/lagging/stagnant/unclear]
Score: [0-100]
Reason: [explanation]"""

    raw_response = llm.chat.completions.create(
        model="",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": feedback}
        ]
    )

    response_content = raw_response.choices[0].message.content.strip()

    if "Unclear" in response_content:
        return {"Text": feedback, "Sentiment": "Unclear", "Score": None, "Reason": ""}
    else:
        try:
            # Extract sentiment, score, and reason from the response
            sentiment_start = response_content.find("Sentiment:") + len("Sentiment:")
            score_start = response_content.find("Score:") + len("Score:")
            reason_start = response_content.find("Reason:") + len("Reason:")

            sentiment = response_content[sentiment_start:response_content.find("Score:")].strip()
            score = response_content[score_start:response_content.find("Reason:")].strip()
            reason = response_content[reason_start:].strip()

            return {"Text": feedback, "Sentiment": sentiment, "Score": score, "Reason": reason}
        except Exception as e:
            # Handle unexpected response format
            return {"Text": feedback, "Sentiment": "Unknown", "Score": None, "Reason": f"Unexpected response format. Error: {str(e)}"}

In [None]:
# Cache to store previously analyzed sentiments
sentiment_cache = {}

# List to store the results for all feedback entries
results = []

# Iterate over each row in the DataFrame
for index, row in data.iterrows():
    row_result = {'ID': row['ID']}  # Initialize result dictionary for each row
    for feature in data.columns[1:]:  # Iterate through all columns except the first column (assumed to be ID)
        feedback = row[feature]
        if pd.notna(feedback):  # Check if feedback is not NaN
            cleaned_feedback = clean_text(feedback)
            if cleaned_feedback in sentiment_cache:
                sentiment_reason = sentiment_cache[cleaned_feedback]
            else:
                sentiment_reason = analyze_sentiment(cleaned_feedback)
                sentiment_cache[cleaned_feedback] = sentiment_reason
            row_result[feature] = cleaned_feedback  # Store cleaned feedback
            row_result[f'{feature}_sentiment_reason'] = sentiment_reason  # Store sentiment analysis result
    results.append(row_result)  # Append the result for the current row to the list

# Create a DataFrame from the results list
final_df = pd.DataFrame(results)

In [None]:
# Save the results to an Excel file
final_df.to_excel('sentiment_analysis_results.xlsx', index=False, engine='openpyxl')
print("Sentiment analysis results saved to 'sentiment_analysis_results.xlsx'.")