In [1]:
# STEP 1: Install SDK
!pip install -q ibm-watsonx-ai ibm-boto3

In [2]:
# STEP 2: Imports
import os, types, re
import pandas as pd
import ibm_boto3
from botocore.client import Config
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import ModelInference

In [3]:
# STEP 3: IBM COS Setup
cos_client = ibm_boto3.client(
    service_name='s3',
    ibm_api_key_id='gzDTJC05ZcAFf5TqF5XaGezjFvJTRYcIfb_GLwkpltpF',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.us-south.cloud-object-storage.appdomain.cloud'
)
bucket = 'abucket-bg77cl645y40cp5'
object_key = 'hotel_reviews_large.csv'

body = cos_client.get_object(Bucket=bucket, Key=object_key)['Body']
if not hasattr(body, "_iter"):
    body.iter_ = types.MethodType(lambda self: 0, body)

df = pd.read_csv(body)

In [4]:
# STEP 4: watsonx.ai Setup
creds = Credentials(
    url="https://us-south.ml.cloud.ibm.com",
    api_key="gzDTJC05ZcAFf5TqF5XaGezjFvJTRYcIfb_GLwkpltpF"
)
model = ModelInference(
    model_id="ibm/granite-13b-instruct-v2",
    credentials=creds,
    project_id="37992c12-d89d-4fa1-82c9-4aa6e9d9a89f",
    params={"temperature": 0.2, "decoding_method": "greedy", "max_new_tokens": 150}
)



In [5]:
# STEP 5: Prompt Template
def generate_prompt(review):
    return f"""
You are an AI assistant that analyzes hotel customer reviews.

Your tasks:
1. Classify the review's sentiment as one of: Positive, Neutral, or Negative.
2. Identify exactly 2–3 specific topics mentioned in the review (e.g., food, service, cleanliness, location, check-in, room, staff).

⚠️ Very important: Follow **this exact response format**, no deviations:

Sentiment: <Positive|Neutral|Negative>
Topics: <comma-separated list of 2–3 relevant hotel-related keywords>

Example Output:
Sentiment: Positive
Topics: staff, cleanliness, room

Now analyze the following review:

'''{review}'''

Your Response:
"""

In [6]:
# STEP 6: Inference and Extraction (Fully Fixed Version)
sentiments = []
topics = []

for i, review in enumerate(df["review"][:20]):
    prompt = generate_prompt(review)

    try:
        response = model.generate(prompt=prompt)
        output = response["results"][0]["generated_text"].strip()

        # Fix formatting if missing 'Sentiment:'
        if "Sentiment:" not in output:
            output_lines = output.splitlines()
            sentiment_line = ""
            topic_line = ""
            for line in output_lines:
                if any(word in line.lower() for word in ["positive", "negative", "neutral"]) and not sentiment_line:
                    sentiment_line = f"Sentiment: {line.strip()}"
                elif any(kw in line.lower() for kw in ["staff", "room", "clean", "food", "check", "location", "spa", "wifi", "pool", "bed"]):
                    topic_line = f"Topics: {line.strip()}"
            output = f"{sentiment_line}\n{topic_line}".strip()

        print(f"\n--- Output for Review {i+1} ---\n{output}\n--------------------------")

        sentiment = "Unknown"
        topic_list = "Unknown"

        # 1. Try to extract with proper format
        sentiment_match = re.search(r"(?i)sentiment\s*:\s*(positive|neutral|negative)", output)
        topics_match = re.search(r"(?i)topics\s*:\s*([^\n\r]+)", output)

        if sentiment_match:
            sentiment = sentiment_match.group(1).capitalize()
        if topics_match:
            topic_list = topics_match.group(1).strip()

        # 2. Fallback: check for simple comma-separated output
        if sentiment == "Unknown" and topic_list == "Unknown":
            parts = [p.strip() for p in output.split(",")]
            if parts:
                first = parts[0].lower()
                if first in {"positive", "negative", "neutral"}:
                    sentiment = first.capitalize()
                    topic_list = ", ".join(parts[1:]) if len(parts) > 1 else "General"
                else:
                    sentiment = "Unknown"
                    topic_list = ", ".join(parts)

        # 3. Final fallback: if sentiment present but no topic
        if sentiment in {"Positive", "Negative", "Neutral"} and (topic_list == "Unknown" or topic_list.strip() == ""):
            topic_list = "General"

        # 4. Handle cases where both are missing
        if sentiment == "Unknown" and (topic_list == "Unknown" or topic_list.strip() == ""):
            topic_list = "General"

    except Exception as e:
        print(f"⚠️ Error in Review {i+1}: {e}")
        sentiment = "Unknown"
        topic_list = "General"

    sentiments.append(sentiment)
    topics.append(topic_list)


--- Output for Review 1 ---
Sentiment: Positive
Topics: staff, cleanliness, room
--------------------------

--- Output for Review 2 ---
Sentiment: Negative
Topics: breakfast, check-in
--------------------------

--- Output for Review 3 ---
Sentiment: Positive
Topics: room, service, balcony
--------------------------

--- Output for Review 4 ---
Sentiment: Negative
Topics: room, hot water, smell
--------------------------

--- Output for Review 5 ---
Sentiment: Positive
Topics: food, location
--------------------------

--- Output for Review 6 ---
Sentiment: Negative
Topics: noise, air conditioning
--------------------------

--- Output for Review 7 ---
Sentiment: Positive
Topics: staff, check-in
--------------------------

--- Output for Review 8 ---
Sentiment: Negative
Topics: bed, bathroom
--------------------------

--- Output for Review 9 ---
Sentiment: Positive
Topics: pool, spa
--------------------------

--- Output for Review 10 ---
Sentiment: Neutral
Topics: location, service

In [7]:
# STEP 7: Save Output
df_out = df[:20].copy()
df_out['Predicted_Sentiment'] = sentiments
df_out['Predicted_Topics'] = topics

output_file = "hotel_review_sentiment_output.csv"
df_out.to_csv(output_file, index=False)

In [8]:
# STEP 8: Upload to COS
cos_client.upload_file(
    Filename=output_file,
    Bucket=bucket,
    Key=output_file
)
print("\n✅ Final CSV saved and uploaded to COS successfully.")



✅ Final CSV saved and uploaded to COS successfully.
