In [None]:
# AIzaSyAmpm7b3R8jGzjBFIQ-9lvlmhpbyw9lK1Y

In [None]:
pip install google-generativeai

In [2]:
import pandas as pd
import json
import google.generativeai as genai

genai.configure(api_key="YOUR API KEY HERE")  # Replace with your actual API key
model = genai.GenerativeModel("gemini-2.5-pro")

# ---------- Load Dataset ----------
file_path = '/Users/nonny/Downloads/Remove Disagreement Version(2).xlsx'  # Change to your file path
df = pd.read_excel(file_path)

df = df.rename(columns={'Column1': 'ID'})

df_positive = df[['ID', 'PositiveReview']].rename(columns={'PositiveReview': 'Review'})
df_negative = df[['ID', 'NegativeReview']].rename(columns={'NegativeReview': 'Review'})

df_positive = df_positive[df_positive['Review'].notna() & (df_positive['Review'].str.strip() != "")]
df_negative = df_negative[df_negative['Review'].notna() & (df_negative['Review'].str.strip() != "")]

df_positive = df_positive.drop_duplicates()
df_negative = df_negative.drop_duplicates()

df_combined = pd.concat([df_positive, df_negative], ignore_index=True)
df_combined = df_combined.sort_values(by=['ID']).reset_index(drop=True)


In [3]:
def build_full_prompt(messages):
    return "\n\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in messages])

def parse_response(response_json, review_id, full_prompt):
    results = []
    try:
        cleaned = response_json.strip()
        if cleaned.startswith("```json"): cleaned = cleaned[7:]
        if cleaned.startswith("```"): cleaned = cleaned[3:]
        if cleaned.endswith("```"): cleaned = cleaned[:-3]
        cleaned = cleaned.strip()

        parsed = json.loads(cleaned)
        topics_list = parsed["Topics"]
        for topic_group in topics_list:
            for topic, entries in topic_group.items():
                for entry in entries:
                    results.append({
                        "ID": review_id,
                        "FullPrompt": full_prompt,
                        "Topics": topic,
                        "Text": entry.get("text") or "",
                        "NegPos": entry.get("label") or ""
                    })
    except Exception as e:
        print(f"Failed to parse response for ID {review_id}: {e}")
    return results

In [None]:
# Zero shot (Use this)
def send_prompt(review_text):
    system_prompt = (
        'Please output the following [text] according to the [constraints] in the [output format].\n '
        '[constraints]* The output should only be in the [output format], and you must classify which part of the text corresponds to which Topic in the [Topics]. '
        'Additionally, determine whether each classified element is Positive or Negative. If there is no corresponding element, put Null for both `text` and `label`. '
        'The most important constraint is not to include any extra characters such as newline characters, `json`, or backticks, or any other unnecessary text outside of the [output format]. '
        'If there are two or more elements of the same Topic, number each so that they do not conflict when converted to json format data. '
        'However, if they have the same NegPos label, keep them in one Text as much as possible.* \n '
        '[Topics] Room, Staff, Location, Food, Price, Facility, Check-in, Check-out, Taxi-issue, Booking-issue, Off \n\n '
        '[output format] '
        '{"Topics":[{"Room":[{"text": "test","label": "Positive"}],'
        '"Staff":[{"text": null,"label": null}],'
        '"Location":[{"text": "test","label": "Positive"}],'
        '"Food":[{"text": "test","label": "Negative"}],'
        '"Price":[{"text": "test","label": "Positive"}],'
        '"Facility":[{"text": "test","label": "Negative"}],'
        '"Check-in":[{"text": "test","label": "Positive"}],'
        '"Check-out":[{"text": null,"label": null}],'
        '"Taxi-issue":[{"text": null,"label": null}],'
        '"Booking-issue":[{"text": null,"label": null}],'
        '"Off":[{"text": null,"label": null}]}]}'
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": review_text}
    ]

    full_prompt = build_full_prompt(messages)

    response = model.generate_content(
        [full_prompt],
        generation_config={
            "temperature": 0,
            "top_p": 0.05
        }
    )

    return response.text, full_prompt

# --------------- Run all rows ----------------
final_results = []

for idx, row in df_combined.iterrows():
    review_text = row['Review']
    review_id = row['ID']
    print(f"Processing review ID {review_id}...")

    try:
        response_content, full_prompt = send_prompt(review_text)
        print(f"Raw response for ID {review_id}:\n{response_content}\n")

        review_results = parse_response(response_content, review_id, full_prompt)
        final_results.extend(review_results)

    except Exception as e:
        print(f"Error processing ID {review_id}: {e}")

# Save to CSV
output_df = pd.DataFrame(final_results)
output_csv_path = 'ABA_dataset_2_gemini_2.5_pro_0_shot_1.csv'
output_df.to_csv(output_csv_path, index=False)

print(" Done! Output saved.")


In [None]:
# One shot (use this)
def send_prompt(review_text):
    system_prompt = (
        'Please output the following [text] according to the [constraints] in the [output format].\n '
        '[constraints]* The output should only be in the [output format], and you must classify which part of the text corresponds to which Topic in the [Topics]. '
        'Additionally, determine whether each classified element is Positive or Negative. If there is no corresponding element, put Null for both `text` and `label`. '
        'The most important constraint is not to include any extra characters such as newline characters, `json`, or backticks, or any other unnecessary text outside of the [output format]. '
        'If there are two or more elements of the same Topic, number each so that they do not conflict when converted to json format data. '
        'However, if they have the same NegPos label, keep them in one Text as much as possible.* \n '
        '[Topics] Room, Staff, Location, Food, Price, Facility, Check-in, Check-out, Taxi-issue, Booking-issue, Off \n\n '
        '[output format] '
        '{"Topics":[{"Room":[{"text": "test","label": "Positive"}],'
        '"Staff":[{"text": null,"label": null}],'
        '"Location":[{"text": "test","label": "Positive"}],'
        '"Food":[{"text": "test","label": "Negative"}],'
        '"Price":[{"text": "test","label": "Positive"}],'
        '"Facility":[{"text": "test","label": "Negative"}],'
        '"Check-in":[{"text": "test","label": "Positive"}],'
        '"Check-out":[{"text": null,"label": null}],'
        '"Taxi-issue":[{"text": null,"label": null}],'
        '"Booking-issue":[{"text": null,"label": null}],'
        '"Off":[{"text": null,"label": null}]}]}'
    )

    messages = [
        {"role": "system", "content": system_prompt},
        {
            "role": "user",
            "content": "The room is enough big. But the room was a little bit dirty."
        },
        {
            "role": "assistant",
            "content": '{"Topics":[{"Room1":[{"text": "The room is enough big.","label": "Positive"}],'
                       '"Room2":[{"text": "the room was a little bit dirty.","label": "Negative"}],'
                       '"Staff":[{"text": null,"label": null}],'
                       '"Location":[{"text": null,"label": null}],'
                       '"Food":[{"text": null,"label": null}],'
                       '"Price":[{"text": null,"label": null}],'
                       '"Facility":[{"text": null,"label": null}],'
                       '"Check-in":[{"text": null,"label": null}],'
                       '"Check-out":[{"text": null,"label": null}],'
                       '"Taxi-issue":[{"text": null,"label": null}],'
                       '"Booking-issue":[{"text": null,"label": null}],'
                       '"Off":[{"text": null,"label": null}]}]}'
        },
        {
            "role": "user",
            "content": review_text,
        }
    ]

    full_prompt = build_full_prompt(messages)

    response = model.generate_content(
        [full_prompt],
        generation_config={
            "temperature": 0,
            "top_p": 0.05
        }
    )

    return response.text, full_prompt

# ----------- Run for ALL rows -------------
for run_num in range(1, 4):  # 1 to 3
    final_results = []

    for idx, row in df_combined.iterrows():
        review_text = row['Review']
        review_id = row['ID']
        
        print(f"[Run {run_num}] Processing review ID {review_id}...")

        try:
            response_content, full_prompt = send_prompt(review_text)
            print(f"[Run {run_num}] Raw response for ID {review_id}:\n{response_content}\n")
            
            review_results = parse_response(response_content, review_id, full_prompt)
            final_results.extend(review_results)

        except Exception as e:
            print(f"[Run {run_num}] Error processing ID {review_id}: {e}")

    # Save results
    output_df = pd.DataFrame(final_results)
    output_csv_path = f'ABA_dataset_2_gemini_2.5_pro_1_shot_{run_num}.csv'
    output_df.to_csv(output_csv_path, index=False)
    print(f" Done! Results saved to {output_csv_path}")



In [None]:
# two shot (use this)
def send_prompt(review_text):
    system_prompt = (
        'Please output the following [text] according to the [constraints] in the [output format].\n '
        '[constraints]* The output should only be in the [output format], and you must classify which part of the text corresponds to which Topic in the [Topics]. '
        'Additionally, determine whether each classified element is Positive or Negative. If there is no corresponding element, put Null for both `text` and `label`. '
        'The most important constraint is not to include any extra characters such as newline characters, `json`, or backticks, or any other unnecessary text outside of the [output format]. '
        'If there are two or more elements of the same Topic, number each so that they do not conflict when converted to json format data. '
        'However, if they have the same NegPos label, keep them in one Text as much as possible.* \n '
        '[Topics] Room, Staff, Location, Food, Price, Facility, Check-in, Check-out, Taxi-issue, Booking-issue, Off \n\n '
        '[output format] '
        '{"Topics":[{"Room":[{"text": "test","label": "Positive"}],'
        '"Staff":[{"text": null,"label": null}],'
        '"Location":[{"text": "test","label": "Positive"}],'
        '"Food":[{"text": "test","label": "Negative"}],'
        '"Price":[{"text": "test","label": "Positive"}],'
        '"Facility":[{"text": "test","label": "Negative"}],'
        '"Check-in":[{"text": "test","label": "Positive"}],'
        '"Check-out":[{"text": null,"label": null}],'
        '"Taxi-issue":[{"text": null,"label": null}],'
        '"Booking-issue":[{"text": null,"label": null}],'
        '"Off":[{"text": null,"label": null}]}]}'
    )

    messages = [
        {"role": "system", "content": system_prompt},
    
        # Example 1
        {"role": "user", "content": "The room is enough big. But the room was a little bit dirty."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room is enough big.","label": "Positive"}],'
                                         '"Room2":[{"text": "the room was a little bit dirty.","label": "Negative"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Price":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },
    
        # Example 2
        {"role": "user", "content": "The room was very clean, well decorated and modern, although not big. It was cheap."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room was very clean, well decorated and modern","label": "Positive"}],'
                                         '"Room2":[{"text": "although not big","label": "Negative"}],'
                                         '"Price":[{"text": "cheap","label": "Positive"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },
    
        # Real input
        {"role": "user", "content": review_text}
    ]


    full_prompt = build_full_prompt(messages)

    response = model.generate_content(
        [full_prompt],
        generation_config={
            "temperature": 0,
            "top_p": 0.05
        }
    )

    return response.text, full_prompt

# ----------- Run for ALL rows -------------
for run_num in range(1, 4):  # 1 to 3
    final_results = []

    for idx, row in df_combined.iterrows():
        review_text = row['Review']
        review_id = row['ID']
        
        print(f"[Run {run_num}] Processing review ID {review_id}...")

        try:
            response_content, full_prompt = send_prompt(review_text)
            print(f"[Run {run_num}] Raw response for ID {review_id}:\n{response_content}\n")
            
            review_results = parse_response(response_content, review_id, full_prompt)
            final_results.extend(review_results)

        except Exception as e:
            print(f"[Run {run_num}] Error processing ID {review_id}: {e}")

    # Save results
    output_df = pd.DataFrame(final_results)
    output_csv_path = f'ABA_dataset_2_gemini_2.5_pro_2_shot_{run_num}.csv'
    output_df.to_csv(output_csv_path, index=False)
    print(f" Done! Results saved to {output_csv_path}")



In [5]:
# three shot (use this)
def send_prompt(review_text):
    system_prompt = (
        'Please output the following [text] according to the [constraints] in the [output format].\n '
        '[constraints]* The output should only be in the [output format], and you must classify which part of the text corresponds to which Topic in the [Topics]. '
        'Additionally, determine whether each classified element is Positive or Negative. If there is no corresponding element, put Null for both `text` and `label`. '
        'The most important constraint is not to include any extra characters such as newline characters, `json`, or backticks, or any other unnecessary text outside of the [output format]. '
        'If there are two or more elements of the same Topic, number each so that they do not conflict when converted to json format data. '
        'However, if they have the same NegPos label, keep them in one Text as much as possible.* \n '
        '[Topics] Room, Staff, Location, Food, Price, Facility, Check-in, Check-out, Taxi-issue, Booking-issue, Off \n\n '
        '[output format] '
        '{"Topics":[{"Room":[{"text": "test","label": "Positive"}],'
        '"Staff":[{"text": null,"label": null}],'
        '"Location":[{"text": "test","label": "Positive"}],'
        '"Food":[{"text": "test","label": "Negative"}],'
        '"Price":[{"text": "test","label": "Positive"}],'
        '"Facility":[{"text": "test","label": "Negative"}],'
        '"Check-in":[{"text": "test","label": "Positive"}],'
        '"Check-out":[{"text": null,"label": null}],'
        '"Taxi-issue":[{"text": null,"label": null}],'
        '"Booking-issue":[{"text": null,"label": null}],'
        '"Off":[{"text": null,"label": null}]}]}'
    )

    messages = [
        {"role": "system", "content": system_prompt},
    
        # Example 1
        {"role": "user", "content": "The room is enough big. But the room was a little bit dirty."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room is enough big.","label": "Positive"}],'
                                         '"Room2":[{"text": "the room was a little bit dirty.","label": "Negative"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Price":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },
    
        # Example 2
        {"role": "user", "content": "The room was very clean, well decorated and modern, although not big. It was cheap."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room was very clean, well decorated and modern","label": "Positive"}],'
                                         '"Room2":[{"text": "although not big","label": "Negative"}],'
                                         '"Price":[{"text": "cheap","label": "Positive"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },

        # Example 3
        {"role": "user", "content": "Location. The hotel was new and close to the airport, which made traveling easy. However, there was a lot of street noise outside the window. Staff. The receptionist was polite and friendly. However, check-in took longer than expected. The hotel lobby was welcoming and spacious. The room had a comfortable bed, but the air conditioning was loud at night. The neighbors were noisy through the walls, and the WiFi in the room was weak and unreliable. The breakfast buffet was delicious; however, the coffee was terrible. The price was reasonable for the quality. The building was charming with historical architecture."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room had a comfortable bed.","label": "Positive"}],'
                               '"Room2":[{"text": "The air conditioning was loud at night.","label": "Negative"}],'
                               '"Room3":[{"text": "The neighbors were noisy through the walls.","label": "Negative"}],'
                               '"Room4":[{"text": "the WiFi in the room was weak and unreliable.","label": "Negative"}],'
                               '"Staff":[{"text": "The receptionist was polite and friendly.","label": "Positive"}],'
                               '"Location1":[{"text": "close to the airport, which made traveling easy.","label": "Positive"}],'
                               '"Location2":[{"text": "there was a lot of street noise outside the window.","label": "Negative"}],'
                               '"Food1":[{"text": "The breakfast buffet was delicious.","label": "Positive"}],'
                               '"Food2":[{"text": "the coffee was terrible.","label": "Negative"}],'
                               '"Price":[{"text": "The price was reasonable for the quality.","label": "Positive"}],'
                               '"Facility1":[{"text": "The hotel was new.","label": "Positive"}],'
                               '"Facility2":[{"text": "The hotel lobby was welcoming and spacious.","label": "Positive"}],'
                               '"Facility3":[{"text": "The building was charming with historical architecture.","label": "Positive"}],'
                               '"Check-in":[{"text": "check-in took longer than expected.","label": "Negative"}],'
                               '"Check-out":[{"text": null,"label": null}],'
                               '"Taxi-issue":[{"text": null,"label": null}],'
                               '"Booking-issue":[{"text": null,"label": null}],'
                               '"Off":[{"text": "Location. Staff.","label": null}]}]}'
        },
    
        # Real input
        {"role": "user", "content": review_text}
    ]


    full_prompt = build_full_prompt(messages)

    response = model.generate_content(
        [full_prompt],
        generation_config={
            "temperature": 0,
            "top_p": 0.05
        }
    )

    return response.text, full_prompt

# ----------- Run for ALL rows -------------
for run_num in range(1, 2):  # 1 to 3
    final_results = []

    for idx, row in df_combined.iterrows():
        review_text = row['Review']
        review_id = row['ID']
        
        print(f"[Run {run_num}] Processing review ID {review_id}...")

        try:
            response_content, full_prompt = send_prompt(review_text)
            print(f"[Run {run_num}] Raw response for ID {review_id}:\n{response_content}\n")
            
            review_results = parse_response(response_content, review_id, full_prompt)
            final_results.extend(review_results)

        except Exception as e:
            print(f"[Run {run_num}] Error processing ID {review_id}: {e}")

    # Save results
    output_df = pd.DataFrame(final_results)
    output_csv_path = f'ABA_dataset_2_gemini_2.5_pro_3_shot_{run_num}.csv'
    output_df.to_csv(output_csv_path, index=False)
    print(f" Done! Results saved to {output_csv_path}")



[Run 1] Processing review ID 1...
[Run 1] Raw response for ID 1:
{"Topics":[{"Room":[{"text": "comfortable apartments","label": "Positive"}],"Staff":[{"text": "Staff is extremely helpful and easy to communicate with","label": "Positive"}],"Location":[{"text": "close to the airport, to very clean beach.","label": "Positive"}],"Food":[{"text": "Tasty food on the first floor","label": "Positive"}],"Price":[{"text": null,"label": null}],"Facility":[{"text": "New, apartments, comfortable restaurant for both cozy evenings and calm work to escape the heat in the midday","label": "Positive"}],"Check-in":[{"text": null,"label": null}],"Check-out":[{"text": null,"label": null}],"Taxi-issue":[{"text": null,"label": null}],"Booking-issue":[{"text": null,"label": null}],"Off":[{"text": null,"label": null}]}]}

[Run 1] Processing review ID 1...
[Run 1] Raw response for ID 1:
{"Topics":[{"Room":[{"text": null,"label": null}],"Staff":[{"text": null,"label": null}],"Location":[{"text": null,"label": nu

In [None]:
# four shot (use this)
def send_prompt(review_text):
    system_prompt = (
        'Please output the following [text] according to the [constraints] in the [output format].\n '
        '[constraints]* The output should only be in the [output format], and you must classify which part of the text corresponds to which Topic in the [Topics]. '
        'Additionally, determine whether each classified element is Positive or Negative. If there is no corresponding element, put Null for both `text` and `label`. '
        'The most important constraint is not to include any extra characters such as newline characters, `json`, or backticks, or any other unnecessary text outside of the [output format]. '
        'If there are two or more elements of the same Topic, number each so that they do not conflict when converted to json format data. '
        'However, if they have the same NegPos label, keep them in one Text as much as possible.* \n '
        '[Topics] Room, Staff, Location, Food, Price, Facility, Check-in, Check-out, Taxi-issue, Booking-issue, Off \n\n '
        '[output format] '
        '{"Topics":[{"Room":[{"text": "test","label": "Positive"}],'
        '"Staff":[{"text": null,"label": null}],'
        '"Location":[{"text": "test","label": "Positive"}],'
        '"Food":[{"text": "test","label": "Negative"}],'
        '"Price":[{"text": "test","label": "Positive"}],'
        '"Facility":[{"text": "test","label": "Negative"}],'
        '"Check-in":[{"text": "test","label": "Positive"}],'
        '"Check-out":[{"text": null,"label": null}],'
        '"Taxi-issue":[{"text": null,"label": null}],'
        '"Booking-issue":[{"text": null,"label": null}],'
        '"Off":[{"text": null,"label": null}]}]}'
    )

    messages = [
        {"role": "system", "content": system_prompt},
    
        # Example 1
        {"role": "user", "content": "The room is enough big. But the room was a little bit dirty."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room is enough big.","label": "Positive"}],'
                                         '"Room2":[{"text": "the room was a little bit dirty.","label": "Negative"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Price":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },
    
        # Example 2
        {"role": "user", "content": "The room was very clean, well decorated and modern, although not big. It was cheap."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room was very clean, well decorated and modern","label": "Positive"}],'
                                         '"Room2":[{"text": "although not big","label": "Negative"}],'
                                         '"Price":[{"text": "cheap","label": "Positive"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },

        # Example 3
        {"role": "user", "content": "Location. The hotel was new and close to the airport, which made traveling easy. However, there was a lot of street noise outside the window. Staff. The receptionist was polite and friendly. However, check-in took longer than expected. The hotel lobby was welcoming and spacious. The room had a comfortable bed, but the air conditioning was loud at night. The neighbors were noisy through the walls, and the WiFi in the room was weak and unreliable. The breakfast buffet was delicious; however, the coffee was terrible. The price was reasonable for the quality. The building was charming with historical architecture."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room had a comfortable bed.","label": "Positive"}],'
                               '"Room2":[{"text": "The air conditioning was loud at night.","label": "Negative"}],'
                               '"Room3":[{"text": "The neighbors were noisy through the walls.","label": "Negative"}],'
                               '"Room4":[{"text": "the WiFi in the room was weak and unreliable.","label": "Negative"}],'
                               '"Staff":[{"text": "The receptionist was polite and friendly.","label": "Positive"}],'
                               '"Location1":[{"text": "close to the airport, which made traveling easy.","label": "Positive"}],'
                               '"Location2":[{"text": "there was a lot of street noise outside the window.","label": "Negative"}],'
                               '"Food1":[{"text": "The breakfast buffet was delicious.","label": "Positive"}],'
                               '"Food2":[{"text": "the coffee was terrible.","label": "Negative"}],'
                               '"Price":[{"text": "The price was reasonable for the quality.","label": "Positive"}],'
                               '"Facility1":[{"text": "The hotel was new.","label": "Positive"}],'
                               '"Facility2":[{"text": "The hotel lobby was welcoming and spacious.","label": "Positive"}],'
                               '"Facility3":[{"text": "The building was charming with historical architecture.","label": "Positive"}],'
                               '"Check-in":[{"text": "check-in took longer than expected.","label": "Negative"}],'
                               '"Check-out":[{"text": null,"label": null}],'
                               '"Taxi-issue":[{"text": null,"label": null}],'
                               '"Booking-issue":[{"text": null,"label": null}],'
                               '"Off":[{"text": "Location. Staff.","label": null}]}]}'
        },

        # Example 4
        {"role": "user", "content": "location, service, overall was good, Sure worth it to come back again"},
        {"role": "assistant", "content": '{"Topics":[{"Room":[{"text": null,"label": null}],'
                                '"Staff":[{"text": null,"label": null}],'
                                '"Location":[{"text": null,"label": null}],'
                                '"Food":[{"text": null,"label": null}],'
                                '"Price":[{"text": null,"label": null}],'
                                '"Facility":[{"text": null,"label": null}],'
                                '"Check-in":[{"text": null,"label": null}],'
                                '"Check-out":[{"text": null,"label": null}],'
                                '"Taxi-issue":[{"text": null,"label": null}],'
                                '"Booking-issue":[{"text": null,"label": null}],'
                                '"Off":[{"text": "location, service, overall was good, Sure worth it to come back again","label": "Positive"}]}]}'
        },
    
        # Real input
        {"role": "user", "content": review_text}
    ]


    full_prompt = build_full_prompt(messages)

    response = model.generate_content(
        [full_prompt],
        generation_config={
            "temperature": 0,
            "top_p": 0.05
        }
    )

    return response.text, full_prompt

# ----------- Run for ALL rows -------------
for run_num in range(1, 4):  # 1 to 3
    final_results = []

    for idx, row in df_combined.iterrows():
        review_text = row['Review']
        review_id = row['ID']
        
        print(f"[Run {run_num}] Processing review ID {review_id}...")

        try:
            response_content, full_prompt = send_prompt(review_text)
            print(f"[Run {run_num}] Raw response for ID {review_id}:\n{response_content}\n")
            
            review_results = parse_response(response_content, review_id, full_prompt)
            final_results.extend(review_results)

        except Exception as e:
            print(f"[Run {run_num}] Error processing ID {review_id}: {e}")

    # Save results
    output_df = pd.DataFrame(final_results)
    output_csv_path = f'ABA_dataset_2_gemini_2.5_pro_4_shot_{run_num}.csv'
    output_df.to_csv(output_csv_path, index=False)
    print(f" Done! Results saved to {output_csv_path}")



In [None]:
## five shot (use this)
def send_prompt(review_text):
    system_prompt = (
        'Please output the following [text] according to the [constraints] in the [output format].\n '
        '[constraints]* The output should only be in the [output format], and you must classify which part of the text corresponds to which Topic in the [Topics]. '
        'Additionally, determine whether each classified element is Positive or Negative. If there is no corresponding element, put Null for both `text` and `label`. '
        'The most important constraint is not to include any extra characters such as newline characters, `json`, or backticks, or any other unnecessary text outside of the [output format]. '
        'If there are two or more elements of the same Topic, number each so that they do not conflict when converted to json format data. '
        'However, if they have the same NegPos label, keep them in one Text as much as possible.* \n '
        '[Topics] Room, Staff, Location, Food, Price, Facility, Check-in, Check-out, Taxi-issue, Booking-issue, Off \n\n '
        '[output format] '
        '{"Topics":[{"Room":[{"text": "test","label": "Positive"}],'
        '"Staff":[{"text": null,"label": null}],'
        '"Location":[{"text": "test","label": "Positive"}],'
        '"Food":[{"text": "test","label": "Negative"}],'
        '"Price":[{"text": "test","label": "Positive"}],'
        '"Facility":[{"text": "test","label": "Negative"}],'
        '"Check-in":[{"text": "test","label": "Positive"}],'
        '"Check-out":[{"text": null,"label": null}],'
        '"Taxi-issue":[{"text": null,"label": null}],'
        '"Booking-issue":[{"text": null,"label": null}],'
        '"Off":[{"text": null,"label": null}]}]}'
    )

    messages = [
        {"role": "system", "content": system_prompt},
    
        # Example 1
        {"role": "user", "content": "The room is enough big. But the room was a little bit dirty."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room is enough big.","label": "Positive"}],'
                                         '"Room2":[{"text": "the room was a little bit dirty.","label": "Negative"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Price":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },
    
        # Example 2
        {"role": "user", "content": "The room was very clean, well decorated and modern, although not big. It was cheap."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room was very clean, well decorated and modern","label": "Positive"}],'
                                         '"Room2":[{"text": "although not big","label": "Negative"}],'
                                         '"Price":[{"text": "cheap","label": "Positive"}],'
                                         '"Staff":[{"text": null,"label": null}],'
                                         '"Location":[{"text": null,"label": null}],'
                                         '"Food":[{"text": null,"label": null}],'
                                         '"Facility":[{"text": null,"label": null}],'
                                         '"Check-in":[{"text": null,"label": null}],'
                                         '"Check-out":[{"text": null,"label": null}],'
                                         '"Taxi-issue":[{"text": null,"label": null}],'
                                         '"Booking-issue":[{"text": null,"label": null}],'
                                         '"Off":[{"text": null,"label": null}]}]}'
        },

        # Example 3
        {"role": "user", "content": "Location. The hotel was new and close to the airport, which made traveling easy. However, there was a lot of street noise outside the window. Staff. The receptionist was polite and friendly. However, check-in took longer than expected. The hotel lobby was welcoming and spacious. The room had a comfortable bed, but the air conditioning was loud at night. The neighbors were noisy through the walls, and the WiFi in the room was weak and unreliable. The breakfast buffet was delicious; however, the coffee was terrible. The price was reasonable for the quality. The building was charming with historical architecture."},
        {"role": "assistant", "content": '{"Topics":[{"Room1":[{"text": "The room had a comfortable bed.","label": "Positive"}],'
                               '"Room2":[{"text": "The air conditioning was loud at night.","label": "Negative"}],'
                               '"Room3":[{"text": "The neighbors were noisy through the walls.","label": "Negative"}],'
                               '"Room4":[{"text": "the WiFi in the room was weak and unreliable.","label": "Negative"}],'
                               '"Staff":[{"text": "The receptionist was polite and friendly.","label": "Positive"}],'
                               '"Location1":[{"text": "close to the airport, which made traveling easy.","label": "Positive"}],'
                               '"Location2":[{"text": "there was a lot of street noise outside the window.","label": "Negative"}],'
                               '"Food1":[{"text": "The breakfast buffet was delicious.","label": "Positive"}],'
                               '"Food2":[{"text": "the coffee was terrible.","label": "Negative"}],'
                               '"Price":[{"text": "The price was reasonable for the quality.","label": "Positive"}],'
                               '"Facility1":[{"text": "The hotel was new.","label": "Positive"}],'
                               '"Facility2":[{"text": "The hotel lobby was welcoming and spacious.","label": "Positive"}],'
                               '"Facility3":[{"text": "The building was charming with historical architecture.","label": "Positive"}],'
                               '"Check-in":[{"text": "check-in took longer than expected.","label": "Negative"}],'
                               '"Check-out":[{"text": null,"label": null}],'
                               '"Taxi-issue":[{"text": null,"label": null}],'
                               '"Booking-issue":[{"text": null,"label": null}],'
                               '"Off":[{"text": "Location. Staff.","label": null}]}]}'
        },

        # Example 4
        {"role": "user", "content": "location, service, overall was good, Sure worth it to come back again"},
        {"role": "assistant", "content": '{"Topics":[{"Room":[{"text": null,"label": null}],'
                                '"Staff":[{"text": null,"label": null}],'
                                '"Location":[{"text": null,"label": null}],'
                                '"Food":[{"text": null,"label": null}],'
                                '"Price":[{"text": null,"label": null}],'
                                '"Facility":[{"text": null,"label": null}],'
                                '"Check-in":[{"text": null,"label": null}],'
                                '"Check-out":[{"text": null,"label": null}],'
                                '"Taxi-issue":[{"text": null,"label": null}],'
                                '"Booking-issue":[{"text": null,"label": null}],'
                                '"Off":[{"text": "location, service, overall was good, Sure worth it to come back again","label": "Positive"}]}]}'
        },

        # Example 5
        {"role": "user", "content": "The apartment was new. The breakfast was amazing and the price was quite reasonable. Overall, we definitely planning to return again soon!"},
        {"role": "assistant", "content": '{"Topics":[{"Room":[{"text": null,"label": null}],'
                                '"Staff":[{"text": null,"label": null}],'
                                '"Location":[{"text": null,"label": null}],'
                                '"Food":[{"text": "The breakfast was amazing and the price was quite reasonable.","label": "Positive"}],'
                                '"Price":[{"text": null,"label": null}],'
                                '"Facility":[{"text": "The apartment was new.","label": "Positive"}],'
                                '"Check-in":[{"text": null,"label": null}],'
                                '"Check-out":[{"text": null,"label": null}],'
                                '"Taxi-issue":[{"text": null,"label": null}],'
                                '"Booking-issue":[{"text": null,"label": null}],'
                                '"Off":[{"text": "Overall, we definitely planning to return again soon!","label": "Positive"}]}]}'
        },
    
        # Real input
        {"role": "user", "content": review_text}
    ]


    full_prompt = build_full_prompt(messages)

    response = model.generate_content(
        [full_prompt],
        generation_config={
            "temperature": 0,
            "top_p": 0.05
        }
    )

    return response.text, full_prompt

# ----------- Run for ALL rows -------------
for run_num in range(1, 4):  # 1 to 3
    final_results = []

    for idx, row in df_combined.iterrows():
        review_text = row['Review']
        review_id = row['ID']
        
        print(f"[Run {run_num}] Processing review ID {review_id}...")

        try:
            response_content, full_prompt = send_prompt(review_text)
            print(f"[Run {run_num}] Raw response for ID {review_id}:\n{response_content}\n")
            
            review_results = parse_response(response_content, review_id, full_prompt)
            final_results.extend(review_results)

        except Exception as e:
            print(f"[Run {run_num}] Error processing ID {review_id}: {e}")

    # Save results
    output_df = pd.DataFrame(final_results)
    output_csv_path = f'ABA_dataset_2_gemini_2.5_pro_5_shot_{run_num}.csv'
    output_df.to_csv(output_csv_path, index=False)
    print(f" Done! Results saved to {output_csv_path}")

