In [1]:
import pandas as pd
!pip install pyarrow

# Read the parquet file
try:
    df = pd.read_parquet('train-00000-of-00001-a5a7c6e4bb30b016.parquet')
    print("Parquet file loaded successfully.")
except Exception as e:
    print(f"Error loading parquet file: {e}")

Parquet file loaded successfully.


In [2]:
!pip install -q -U google-generativeai

In [3]:
import os
import json
import google.generativeai as genai

def analyze_sentiment_gemini(text, api_key):
    """
    Analyzes the sentiment of a given text using the Gemini API with function calling simulation.

    Args:
        text: The text to analyze.
        api_key: Your Gemini API key.

    Returns:
        A dictionary containing the sentiment analysis results, or None if an error occurs.
    """
    # Configure Gemini API
    # genai.configure(api_key=api_key)
    model = genai.GenerativeModel("gemini-pro")

    # Define the function schema
    function_schema = {
        "name": "analyze_sentiment",
        "description": "Analyzes the sentiment of a given text.",
        "parameters": {
            "type": "object",
            "properties": {
                "sentence": {
                    "type": "string",
                    "description": "The original sentence provided for sentiment analysis."
                },
                "sentiment": {
                    "type": "int",
                    "description": "The sentiment of the text (e.g., Positive(1), Negative(-1), Neutral(0), frustrated(-2))."
                },
                "explanation": {
                    "type": "string",
                    "description": "A brief explanation justifying the sentiment analysis result."
                }
            },
            "required": ["sentiment"]
        }
    }

    try:
        # Simulating function calling by asking Gemini to follow the schema
        prompt = f"""
        You are an assistant that uses the function {function_schema['name']} to analyze text.
        Follow this JSON schema strictly:
        {json.dumps(function_schema['parameters'],indent=3)}

        Analyze the following text:
        "{text}"
        """

        response = model.generate_content(prompt)

        # Parse JSON response from the model
        sentiment_data = json.loads(response.text)
        return sentiment_data

    except json.JSONDecodeError:
        print("Failed to decode JSON from the model's response.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [4]:
API_KEY = genai.configure(api_key="AIzaSyCfMhjWIAnVNdJwamWWhP0BTTi-Y8w0H2k")

In [5]:
result = analyze_sentiment_gemini("I am Feeling horrible today.", API_KEY)
# print(result['explanation'])
print(result)

{'sentence': 'I am Feeling horrible today.', 'sentiment': -2, 'explanation': 'The sentence contains several words with negative connotations, such as "horrible" and "today". The overall tone of the sentence is negative, indicating that the speaker is feeling frustrated or unhappy.'}


In [6]:
df.head()

Unnamed: 0,issue_area,issue_category,issue_sub_category,issue_category_sub_category,customer_sentiment,product_category,product_sub_category,issue_complexity,agent_experience_level,agent_experience_level_desc,conversation
0,Login and Account,Mobile Number and Email Verification,Verification requirement for mobile number or ...,Mobile Number and Email Verification -> Verifi...,neutral,Appliances,Oven Toaster Grills (OTG),medium,junior,"handles customer inquiries independently, poss...",Agent: Thank you for calling BrownBox Customer...
1,Cancellations and returns,Pickup and Shipping,Reasons for being asked to ship the item,Pickup and Shipping -> Reasons for being asked...,neutral,Electronics,Computer Monitor,less,junior,"handles customer inquiries independently, poss...",Agent: Thank you for calling BrownBox customer...
2,Cancellations and returns,Replacement and Return Process,Inability to click the 'Cancel' button,Replacement and Return Process -> Inability to...,neutral,Appliances,Juicer/Mixer/Grinder,medium,experienced,"confidently handles complex customer issues, e...",Agent: Thank you for calling BrownBox Customer...
3,Login and Account,Login Issues and Error Messages,Error message regarding exceeded attempts to e...,Login Issues and Error Messages -> Error messa...,neutral,Appliances,Water Purifier,less,inexperienced,"may struggle with ambiguous queries, rely on c...","Customer: Hi, I am facing an issue while loggi..."
4,Order,Order Delivery Issues,Delivery not attempted again,Order Delivery Issues -> Delivery not attempte...,negative,Electronics,Bp Monitor,medium,experienced,"confidently handles complex customer issues, e...",Agent: Thank you for contacting BrownBox custo...


In [7]:
df['sentiment_encoded'] = df["customer_sentiment"].map({"negative": -1, "neutral": 0, "positive": 1, "frustrated": -2})

df.head()

Unnamed: 0,issue_area,issue_category,issue_sub_category,issue_category_sub_category,customer_sentiment,product_category,product_sub_category,issue_complexity,agent_experience_level,agent_experience_level_desc,conversation,sentiment_encoded
0,Login and Account,Mobile Number and Email Verification,Verification requirement for mobile number or ...,Mobile Number and Email Verification -> Verifi...,neutral,Appliances,Oven Toaster Grills (OTG),medium,junior,"handles customer inquiries independently, poss...",Agent: Thank you for calling BrownBox Customer...,0
1,Cancellations and returns,Pickup and Shipping,Reasons for being asked to ship the item,Pickup and Shipping -> Reasons for being asked...,neutral,Electronics,Computer Monitor,less,junior,"handles customer inquiries independently, poss...",Agent: Thank you for calling BrownBox customer...,0
2,Cancellations and returns,Replacement and Return Process,Inability to click the 'Cancel' button,Replacement and Return Process -> Inability to...,neutral,Appliances,Juicer/Mixer/Grinder,medium,experienced,"confidently handles complex customer issues, e...",Agent: Thank you for calling BrownBox Customer...,0
3,Login and Account,Login Issues and Error Messages,Error message regarding exceeded attempts to e...,Login Issues and Error Messages -> Error messa...,neutral,Appliances,Water Purifier,less,inexperienced,"may struggle with ambiguous queries, rely on c...","Customer: Hi, I am facing an issue while loggi...",0
4,Order,Order Delivery Issues,Delivery not attempted again,Order Delivery Issues -> Delivery not attempte...,negative,Electronics,Bp Monitor,medium,experienced,"confidently handles complex customer issues, e...",Agent: Thank you for contacting BrownBox custo...,-1


In [8]:

import pandas as pd
import json

# Assuming 'df' is already loaded and 'analyze_sentiment_gemini' function is defined

# Example usage: Analyze sentiment for rows specified in an array
row_numbers = [0, 1, 2]  # Replace with your desired row numbers

for i in row_numbers:
    try:
        conversation = df['conversation'].iloc[i]
        result = analyze_sentiment_gemini(conversation, API_KEY)

        if result:
            print(f"Row {i}:")
            # print(f"  Conversation: {conversation}")
            print(f"  Sentiment: {result.get('sentiment')}") # Use .get() to avoid KeyError
            print(f"  Explanation: {result.get('explanation')}") # Use .get() to avoid KeyError
            print("---")
        else:
            print(f"Sentiment analysis failed for row {i}")

    except IndexError:
        print(f"Row {i} is out of bounds.")
    except Exception as e:
        print(f"An error occurred for row {i}: {e}")

Failed to decode JSON from the model's response.
Sentiment analysis failed for row 0
Failed to decode JSON from the model's response.
Sentiment analysis failed for row 1
Row 2:
  Sentiment: 0
  Explanation: The conversation between the customer and the agent is primarily focused on resolving the issue with the 'Cancel' button and providing guidance on the replacement process. The overall tone of the conversation is polite and helpful, indicating a neutral sentiment.
---


In [9]:
import time

for i in row_numbers:
    try:
        conversation = df['conversation'].iloc[i]
        if pd.isna(conversation) or not isinstance(conversation, str) or conversation.strip() == "":
            print(f"Row {i} has invalid data: {conversation}")
            continue

        result = analyze_sentiment_gemini(conversation, API_KEY)
        if result:
            print(f"Row {i}:")
            print(f"  Sentiment: {result.get('sentiment', 'N/A')}")
            print(f"  Explanation: {result.get('explanation', 'N/A')}")
            print("---")
        else:
            print(f"Sentiment analysis failed for row {i}")

        time.sleep(1)  # Avoid hitting API rate limits

    except IndexError:
        print(f"Row {i} is out of bounds.")
    except Exception as e:
        print(f"An error occurred for row {i}: {e}")


Row 0:
  Sentiment: 1
  Explanation: The conversation mainly involves the agent efficiently resolving the customer's issue, leading to a positive sentiment.
---
Failed to decode JSON from the model's response.
Sentiment analysis failed for row 1
An error occurred: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Sentiment analysis failed for row 2


In [10]:
# prompt: modify above to predict sentiment for the entire conversation as string input, and not on the sentence of conversation

import pandas as pd
import os,pickle
import json
import google.generativeai as genai
import time

# ... (your existing code)

def analyze_sentiment_gemini(text, api_key):
    """
    Analyzes the sentiment of a given text using the Gemini API.

    Args:
        text: The text to analyze.
        api_key: Your Gemini API key.

    Returns:
        A dictionary containing the sentiment analysis results, or None if an error occurs.
    """
    genai.configure(api_key="AIzaSyCfMhjWIAnVNdJwamWWhP0BTTi-Y8w0H2k") # Replace with your actual API key
    model = genai.GenerativeModel("gemini-pro")

    prompt = f"""
    Analyze the overall sentiment of the following conversation.
    Return the result in this JSON format:
    {{
        "sentiment": 1 (for Positive), 0 (for Neutral), -1 (for Negative), -2 (for Frustrated),
        "explanation": "A brief explanation of the overall sentiment."
    }}

    Conversation:
    ```
    {text}
    ```
    """

    try:
        response = model.generate_content(prompt)
        #print(f"Raw API response: {response.text}")  # Log raw response for debugging

        sentiment_data = json.loads(response.text)
        return sentiment_data

    except json.JSONDecodeError:
        print("Failed to decode JSON response.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Example usage (modified)
row_numbers = [0, 1, 2]  # Specify rows to analyze

for i in row_numbers:
    try:
        conversation = df['conversation'].iloc[i]
        if not isinstance(conversation, str) or conversation.strip() == "":
            print(f"Row {i} has invalid data: {conversation}")
            continue

        print(f"Analyzing row {i}...")
        result = analyze_sentiment_gemini(conversation, API_KEY)

        if result:
            print(f"Row {i} result:")
            #print(result)
            os.makedirs('model', exist_ok=True)
            sentiment_result = result
            with open('model/sentiment_analysis_output.pkl', 'wb') as f:
              pickle.dump(sentiment_result, f)
            print(f"  Sentiment: {result.get('sentiment', 'N/A')}")
            print(f"  Explanation: {result.get('explanation', 'N/A')}")
        else:
            print(f"Sentiment analysis failed for row {i}")

        time.sleep(1)  # Avoid rate limiting

    except IndexError:
        print(f"Row {i} is out of bounds.")
    except Exception as e:
        print(f"An error occurred for row {i}: {e}")


Analyzing row 0...
Row 0 result:
  Sentiment: 1
  Explanation: The conversation has an overall positive tone. The customer is polite and respectful, and the agent is helpful and efficient. The customer is able to resolve their issue with the agent's help, and both parties are satisfied with the interaction.
Analyzing row 1...
Row 1 result:
  Sentiment: 1
  Explanation: The conversation has an overall positive sentiment. The customer has a problem with their product, but the agent is able to resolve it quickly and efficiently. The customer is satisfied with the solution and thanks the agent for their help.
Analyzing row 2...
Row 2 result:
  Sentiment: 1
  Explanation: The overall sentiment of the conversation is positive. The customer was initially frustrated but became more positive after the agent helped resolve their issue and provided clear instructions on how to return the product and receive a replacement.


In [12]:
# prompt: calculate the f1 score for the predicted sentiment

import pickle
from sklearn.metrics import f1_score

# Load the saved sentiment analysis results
try:
    with open('model/sentiment_analysis_output.pkl', 'rb') as f:
        sentiment_results = pickle.load(f)
except FileNotFoundError:
    print("Error: 'sentiment_analysis_output.pkl' not found. Run the sentiment analysis code first.")
    exit()
except Exception as e:
    print(f"Error loading sentiment results: {e}")
    exit()


# Assuming 'df' is your DataFrame and 'sentiment_results' is a list
# of dictionaries containing the model's sentiment predictions

y_true = []  # True sentiment labels from your DataFrame
y_pred = []  # Predicted sentiment labels from the model

# Extract true labels for the analyzed rows (assuming row_numbers is defined as before)
row_numbers = [0, 1, 2]  # Replace with your actual row numbers
for i in row_numbers:
    try:
        true_sentiment = df['sentiment_encoded'].iloc[i]
        y_true.append(true_sentiment)

        # Extract the predicted sentiment label
        predicted_sentiment = sentiment_results.get('sentiment')
        if predicted_sentiment is not None:
            y_pred.append(predicted_sentiment)
        else:
            print(f"Warning: Missing 'sentiment' key in prediction for row {i}. Skipping this row.")

    except (IndexError, KeyError) as e:
        print(f"Error processing row {i}: {e}")

if len(y_true) != len(y_pred):
    print("Error: Number of true labels and predictions do not match. Cannot calculate F1 score.")
else:
    # Calculate the F1 score
    f1 = f1_score(y_true, y_pred, average='weighted')  # Use 'weighted' average for imbalanced classes

    print(f"F1 Score: {f1}")

F1 Score: 0.0


In [14]:
# prompt: calculate the f1 score for the predicted sentiment and for each sentiment positive negative and frustrtaed, give complete code

# Calculate F1 scores for each sentiment category

from sklearn.metrics import f1_score

# ... (your existing code)

# Load the saved sentiment analysis results (assuming this part is already in your code)
# ...

y_true = []
y_pred = []

row_numbers = [0, 1, 2]  # Replace with your actual row numbers

for i in row_numbers:
    try:
        true_sentiment = df['sentiment_encoded'].iloc[i]
        y_true.append(true_sentiment)

        predicted_sentiment = sentiment_results.get('sentiment')
        if predicted_sentiment is not None:
            y_pred.append(predicted_sentiment)
        else:
            print(f"Warning: Missing 'sentiment' key in prediction for row {i}. Skipping this row.")
    except (IndexError, KeyError) as e:
        print(f"Error processing row {i}: {e}")

if len(y_true) != len(y_pred):
    print("Error: Number of true labels and predictions do not match.")
else:
    # Calculate overall F1 score (weighted average)
    overall_f1 = f1_score(y_true, y_pred, average='weighted')
    print(f"Overall F1 Score: {overall_f1}")

    # Calculate F1 scores for each sentiment category
    sentiment_labels = [-2, -1, 0, 1]  # Your sentiment labels
    for label in sentiment_labels:
        true_for_label = [1 if t == label else 0 for t in y_true]
        pred_for_label = [1 if p == label else 0 for p in y_pred]

        try:
            f1_for_label = f1_score(true_for_label, pred_for_label, zero_division=1)  # Handle cases where there are no true or predicted instances of a class
            print(f"F1 Score for sentiment {label}: {f1_for_label}")
        except ValueError as e:
            print(f"Error calculating F1 score for sentiment {label}: {e}.")

Overall F1 Score: 0.0
F1 Score for sentiment -2: 1.0
F1 Score for sentiment -1: 1.0
F1 Score for sentiment 0: 0.0
F1 Score for sentiment 1: 0.0
