In [None]:
# 1. Install and Import Required Libraries
!pip install -U ibm-watsonx-ai

import pandas as pd
import random
import re
from collections import Counter
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

# 2. Generate Synthetic Car Rental Reviews
sentiments = ['Positive', 'Negative', 'Neutral']
issues = [
    'late delivery', 'car condition', 'staff behavior', 'pricing', 'cleanliness',
    'fuel policy', 'insurance', 'pickup process', 'dropoff process',
    'hidden charges', 'customer service', 'vehicle availability'
]

positive_templates = [
    "The car was in excellent condition and the staff was very helpful.",
    "Smooth pickup and dropoff process, no issues at all!",
    "Great experience, the vehicle was clean and ready on time.",
    "Affordable pricing and friendly customer service.",
    "Everything went as expected, will rent again."
]

negative_templates = [
    "The car was delivered late and the staff was rude.",
    "Hidden charges were added to my bill, very disappointed.",
    "The vehicle was not clean and had a bad smell.",
    "Pickup process was slow and the car was not available on time.",
    "Customer service did not help with my insurance issue."
]

neutral_templates = [
    "The rental process was average, nothing special.",
    "Car was okay, but nothing stood out.",
    "Experience was neither good nor bad.",
    "Service was acceptable, but could be improved.",
    "No major issues, but not exceptional either."
]

def generate_review():
    sentiment = random.choices(sentiments, weights=[0.5, 0.3, 0.2])[0]
    if sentiment == 'Positive':
        review = random.choice(positive_templates)
    elif sentiment == 'Negative':
        review = random.choice(negative_templates)
    else:
        review = random.choice(neutral_templates)
    if sentiment in ['Negative', 'Neutral'] and random.random() < 0.7:
        review += f" Issue: {random.choice(issues)}."
    return review

num_reviews = 100
reviews = [generate_review() for _ in range(num_reviews)]
df = pd.DataFrame({'review_text': reviews})
df.to_csv('car_rental_reviews.csv', index=False)

# 3. Set Up IBM watsonx.ai Credentials and Model
creds = Credentials(
    url="https://us-south.ml.cloud.ibm.com",
    api_key="BIz4_A5LPvPJ0GzKTPM9Jz8zyoUMXbZY1DY0PY-zHpv1"
)

model_id = "ibm/granite-13b-instruct-v2"

params = {
    GenParams.DECODING_METHOD: "greedy",
    GenParams.MAX_NEW_TOKENS: 100,
    GenParams.TEMPERATURE: 0
}

model = ModelInference(
    credentials=creds,
    model_id=model_id,
    project_id="764e2c75-70ae-4a9e-bea5-8e2b15c2a964",
    params=params
)

# 4. Load Synthetic Data
df = pd.read_csv("car_rental_reviews.csv")
df = df.dropna(subset=['review_text'])

# 5. Updated Prompt Function to clarify Neutral with issues
def build_prompt(review):
    return (
        "You are an expert customer feedback analyst. "
        "Analyze the following car rental customer review. "
        "Respond ONLY in the following format (no explanations):\n"
        "Sentiment: <Positive|Negative|Neutral>\n"
        "Issues: <comma-separated issues from this list or 'None'>\n"
        "\n"
        "Examples:\n"
        "Sentiment: Negative\n"
        "Issues: late delivery, staff behavior\n"
        "Sentiment: Neutral\n"
        "Issues: hidden charges\n"
        "Sentiment: Neutral\n"
        "Issues: staff behavior\n"
        "Sentiment: Neutral\n"
        "Issues: None\n"
        "Sentiment: Positive\n"
        "Issues: None\n"
        "\n"
        "Guidelines:\n"
        "- Neutral: Use for reviews that are mixed, average, or contain phrases like 'acceptable', 'could be improved', 'average', 'nothing special', or 'no major issues', EVEN IF an issue is mentioned.\n"
        "- Negative: Use only for reviews that contain strong negative language, complaints, or dissatisfaction.\n"
        "- Identify and list all issues mentioned, even in neutral or positive reviews.\n"
        "\n"
        f"Review: {review}"
    )

# 6. Analyze Reviews with watsonx.ai
def analyze_review(review):
    prompt = build_prompt(review)
    try:
        response = model.generate_text(prompt=prompt)
        return response
    except Exception as e:
        print(f"Error analyzing review: {str(e)}")
        return "Sentiment: Unknown\nIssues: None"

df['analysis'] = df['review_text'].apply(analyze_review)

# DEBUG: Print a few raw outputs to check format
print("Sample model outputs:")
print(df['analysis'].head(5).to_string(index=False))

# 7. Enhanced Extraction Function with post-processing for neutral reviews with issues
def extract_sentiment_issue(text, review_text=None):
    try:
        sentiment_match = re.search(r"Sentiment:\s*(Positive|Negative|Neutral|Unknown)", text, re.IGNORECASE)
        issues_match = re.search(r"Issues:\s*(.*?)(?:\n|$)", text, re.IGNORECASE)
        sentiment = None
        issues = []
        if sentiment_match:
            sentiment = sentiment_match.group(1).capitalize()
        if issues_match:
            issues_str = issues_match.group(1).strip()
            issues = [i.strip().lower() for i in issues_str.split(',') if i.strip() and i.lower() != 'none']
        # Fallback: if only sentiment word is returned
        if not sentiment:
            single_word = text.strip().split()[0].capitalize()
            if single_word in ['Positive', 'Negative', 'Neutral']:
                sentiment = single_word
        if sentiment is None or sentiment == "Unknown":
            sentiment = "Neutral"  # fallback to Neutral
        # Fallback: extract from review if issues list is empty and "Issue:" is present in review_text
        if not issues and review_text:
            issue_in_text = re.search(r"Issue:\s*([a-zA-Z ]+)\.", review_text)
            if issue_in_text:
                issues = [issue_in_text.group(1).strip().lower()]
        # Post-processing: if neutral keywords and sentiment is negative, override as neutral
        if review_text and sentiment == "Negative":
            neutral_keywords = [
                "average", "nothing special", "acceptable", "could be improved", "no major issues", "not exceptional"
            ]
            if any(kw in review_text.lower() for kw in neutral_keywords):
                sentiment = "Neutral"
        return sentiment, issues
    except Exception:
        return "Neutral", []

# 8. Apply Extraction Function with review text
df[['sentiment', 'issues']] = df.apply(
    lambda row: pd.Series(extract_sentiment_issue(row['analysis'], row['review_text'])),
    axis=1
)

# 9. Updated Summary Function
def generate_summary(df):
    sentiment_counts = df['sentiment'].value_counts()
    all_issues = [issue for issues in df['issues'] for issue in issues]
    issue_counts = Counter(all_issues)
    summary = (
        f"## Car Rental Feedback Analysis Report\n"
        f"**Total Reviews Analyzed**: {len(df)}\n\n"
        f"### Sentiment Distribution\n"
        f"- Positive: {sentiment_counts.get('Positive', 0)} reviews\n"
        f"- Negative: {sentiment_counts.get('Negative', 0)} reviews\n"
        f"- Neutral: {sentiment_counts.get('Neutral', 0)} reviews\n\n"
        f"### Top Mentioned Issues\n"
    )
    if issue_counts:
        summary += "| Issue | Occurrences |\n|-------|-------------|\n"
        for issue, count in issue_counts.most_common(3):
            summary += f"| {issue.capitalize()} | {count} |\n"
    else:
        summary += "No issues extracted by the model. Here are the top 3 most frequent negative reviews:\n"
        negative_reviews = df[df['sentiment'] == 'Negative']['review_text']
        top_negatives = negative_reviews.value_counts().head(3)
        for review, count in top_negatives.items():
            summary += f"- \"{review}\" (appeared {count} times)\n"
    negative_reviews = df[df['sentiment'] == 'Negative']
    if not negative_reviews.empty:
        summary += f"\n### Representative Negative Feedback\n"
        for i, row in negative_reviews.head(3).iterrows():
            summary += f"- \"{row['review_text']}\"\n"
    return summary

report = generate_summary(df)
print(report)

# 10. Export Results
df.to_csv("car_rental_review_analysis_results.csv", index=False)


In [None]:
# 11. Provide Download Link in Notebook
from IPython.display import HTML
import base64

def create_download_link(filename, title="Download CSV file"):
    with open(filename, "rb") as f:
        b64 = base64.b64encode(f.read()).decode()
    return HTML(f'<a download="{filename}" href="data:text/csv;base64,{b64}">{title}</a>')

create_download_link('car_rental_review_analysis_results.csv')
