In [None]:
!pip install openai
!pip install tiktoken

Collecting openai
  Downloading openai-1.47.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.47.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.6/375.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━

In [None]:
# @title Importing relevant libraries, files, classes and functions
from google.colab import userdata, drive
drive.mount('/content/drive')
import os
import sys
base = "/content/drive/MyDrive/ABSA-LLMs-DistillingSbS"
sys.path.append(base)
import openai
import random
from pydantic import BaseModel, Field, validator
from Review import Review
from annotation_utils import generate_aspects_and_scores_json_format
import json
import pandas as pd
import tiktoken
from typing import Dict, Any, Optional, ClassVar, List, IO, Tuple, Union

Mounted at /content/drive


In [None]:
# @title Reading the csv-file of reviews into a pandas dataframe. Also loading the OpenAI API key and setting the desired model
path_reviews = base + "/data/filtered_and_shuffled_reviews_35k.csv"
reviews = pd.read_csv(path_reviews)

OPENAI_API_KEY = userdata.get("OpenAI_API_KEY")
OPENAI_MODEL = 'gpt-3.5-turbo'
client = openai.Client(api_key=OPENAI_API_KEY)

The following 3 cells generate annotated reviews. The dataframe that is loaded above already contains annotations so they're not necessary to run.

In [None]:
# @title Default prompt
PROMPT_ABSA_WITH_RATIONALES = """Consider the following review of a mobile application:

App Name: {review.app_name}
Review Title: \"{review.title}\"
Review Body: \"{review.body}\"
App Category : {review.app_category}

Perform aspect-based sentiment analysis on the review, and consider only the aspects explicitly or implicitly mentioned in the review.
Generate a JSON-string with aspects as keys and JSON-arrays as values with the sentiment polarities (positive, negative, neutral) as the first element and a short
rationale explaining what prompted the inclusion of each aspect as the second element.

#### Example 1:
- Review Title : \"Disappointing\"
- Review Body : \"This app has potential, but the lack of essential features like offline mode, dark mode, and collaboration options is disappointing. The features need improvement.\"
- Aspects, Sentiments and Rationales : {{
  "offline mode": ["negative", "The absence of offline mode is cited as a disappointment."],
  "dark mode": ["negative", "The lack of dark mode is missing and cited as a disappointment."],
  "collaboration options": ["negative", "The absence of collaboration options is cited as disappointing, suggesting dissatisfaction with the app's collaborative features."],
  "features": ["negative", "Overall dissatisfaction with the app's features, indicating that they need improvement to meet user expectations."]
}}

#### Example 2:
- Review Title: \"Its OK, but competitors are better\"
- Review Body: \"While this app is decent, competitors like AppX and AppY offer a more seamless and efficient experience. They have better user interfaces and faster response times.\"
- Aspects and Sentiments: {{
  "user interface": ["negative", "The comparison suggests that the app's user interface is inferior to competitors, indicating dissatisfaction with its design or usability."],
  "response time": ["negative", "The mention of competitors having faster response times implies dissatisfaction with the app's performance in this aspect."]
}}

### Example 3:
- Review Title: \"This app is awesome!\"
- Review Body: \"This app is awesome! I love it! I can't wait to use it!\"
- Aspects and Sentiments: {{
}}

Be creative and include aspects even if they are only mentioned implicitly. Keep aspects and sentiments in lower case. If there are no aspects, return an empty JSON-Object."""

34926

In [None]:
completed_reviews = [] # Pre-allocation
failed_ids = [] # Pre-allocation, should any API-requests fail, the corresponding user review ids will be stored in this list
nr_requests = 10 # change to len(reviews) if u want to annotate the entire dataset

In [None]:
for i in range(nr_requests):
    print(f"row : {i}")
    review = Review.from_dataframe_row(reviews.iloc[i])
    generate_aspects_and_scores_json_format(review, column_name='gpt-3.5-turbo-1106-temp-0.0-rationales', model_name='gpt-3.5-turbo', temperature=0.0, prompt_raw = PROMPT_ABSA_WITH_RATIONALES, predefined_aspects=False, rationales=True, failed_ids = failed_ids)
    completed_reviews.append(review)

In [None]:
# @title inserts the generated labels into the dataframe as json-strings in the columns ABSALabels and ABSARationales
for review in completed_reviews:
  temp_asr = review.generated_aspects_and_scores['gpt-3.5-turbo-1106-temp-0.0-rationales']['aspects_and_scores']
  reviews.loc[reviews['userReviewId'] ==review.user_review_id, 'ABSALabels'] = json.dumps({key : value[0] for key, value in temp_asr.items()})
  reviews.loc[reviews['userReviewId'] ==review.user_review_id, 'ABSARationales'] = json.dumps({key : value[1] for key, value in temp_asr.items()})

In [None]:
reviews.to_csv(path_reviews, index=False)