In [1]:
import random
import json
import openai
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from summac.model_summac import SummaCZS
from nltk.corpus import stopwords

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
acos_pred_only = "yelp-acos-_ab50qdWOk0DdB6XOrBitw-200.jsonl"
acos_pred_category = "yelp-acos-_ab50qdWOk0DdB6XOrBitw-200-category-groups.json"
acos_pred_aspect = "yelp-acos-_ab50qdWOk0DdB6XOrBitw-200-aspect-groups.json"

review_sample = "yelp-acos-100-sampled-rest-reviews.json"

openai.api_key = ""

### Read in quads from JSON

In [3]:
# Read in 200 reviews each with ACOS predictions - Each line is a JSON dictionary
reviews_acos = []
with open(acos_pred_only, "r") as f:
    for line in f:
        review = json.loads(line)
        reviews_acos.append(review)

In [4]:
# Read in reviews grouped by category, 5 category groups were chosen
# len(reviews_acos_category) = 5
# each category group has three items: group_name, group_size, group
reviews_acos_category = {}
with open(acos_pred_category, "r") as f:
    reviews_acos_category = json.load(f)

In [5]:
# 5 Categories
print("Sorted by categories")
categories = []
for i in range(0, len(reviews_acos_category)):
    categories.append(reviews_acos_category[i]['group_name'])
    
for i in range(0, len(reviews_acos_category)):
    print(reviews_acos_category[i]['group_name'] + ":", reviews_acos_category[i]['group_size'], "reviews")

Sorted by categories
QUALITY: 532 reviews
GENERAL: 244 reviews
STYLE_OPTIONS: 113 reviews
SERVICE: 52 reviews
PRICES: 22 reviews


In [6]:
categories

['QUALITY', 'GENERAL', 'STYLE_OPTIONS', 'SERVICE', 'PRICES']

In [7]:
# Read in reviews grouped by aspect, 7 aspects were chosen
# len(reviews_acos_aspect) = 7
# each aspect has three items: group_name, group_size, group
reviews_acos_aspect = {}
with open(acos_pred_aspect, "r") as f:
    reviews_acos_aspect = json.load(f)

In [8]:
# 7 Aspects
print("Sorted by aspects")
aspects = []
for i in range(0, len(reviews_acos_aspect)):
    aspects.append(reviews_acos_aspect[i]['group_name'])

for i in range(0, len(reviews_acos_aspect)):
    print(reviews_acos_aspect[i]['group_name'] + ":", reviews_acos_aspect[i]['group_size'], "reviews")

Sorted by aspects
oysters: 54 reviews
service: 45 reviews
chargrilled oysters: 43 reviews
food: 41 reviews
Acme: 29 reviews
place: 24 reviews
gumbo: 22 reviews


In [9]:
aspects

['oysters', 'service', 'chargrilled oysters', 'food', 'Acme', 'place', 'gumbo']

### Use GPT to turn these quads into summary about that specific aspect for 1 restaurant
#### Pass just quads (Grouped by Categories) to gpt/davinci to generate summary

In [None]:
categories_output = []

In [None]:
# Compile all acos quadruples for each category/aspect
# returns a list of acos quadruples for said aspect
def compile_acos(group_index, review_acos_dictionary):
    acos_quads = []
    group_name = review_acos_dictionary[group_index]['group_name']
    for i in range(review_acos_dictionary[group_index]['group_size']):        
        reviews = review_acos_dictionary[group_index]['group'][i]['acos_preds']
        for j in range(len(reviews)):
            if(reviews[j]['category'] == group_name):
                acos_quads.append(reviews[j])
    return acos_quads

# Function to generate a prompt
def generate_prompt(group_name, acos_quads, max_quads):
    
    # Format the acos quadruples into a string
    def acos2str(acos_quads):
        formatted_acos = ""
        for i, item in enumerate(acos_quads):
            aspect = ' '.join(item['aspect']) if isinstance(item['aspect'], (list, tuple)) else item['aspect']
            opinion = ' '.join(item['opinion']) if isinstance(item['opinion'], (list, tuple)) else item['opinion']
            #category = item['category']
            sentiment = item['sentiment']
            #formatted_acos += f"{i+1}. Aspect: {aspect}, Category: {category}, Opinion: {opinion}, Sentiment: {sentiment}\n"
            formatted_acos += f"{i+1}. Aspect: {aspect}, Opinion: {opinion}, Sentiment: {sentiment}\n"
        return formatted_acos.strip()
    
    acos_quads = acos_quads[:max_quads]
    prompt = f'Summarize the following restaurant reviews with at most 5 sentences. Instead of reviews, you are provided with aspect-category-opinion-sentiment quadruples. The summarization should only be about "{group_name}". '
    formatted_acos_string = acos2str(acos_quads)
    
    prompt = prompt + formatted_acos_string
    
    return prompt

def generate_summary_category(group_index):
    review2acos = compile_acos(group_index, reviews_acos_category)

    openai_model = "text-davinci-003"
    group_name = categories[group_index]

    prompt = generate_prompt(group_name, review2acos, 150)

    resp = openai.Completion.create(
        model = openai_model,
        prompt = prompt,
        temperature = 0,
        max_tokens = 1000
    )

    summary_str = resp['choices'][0]['text'].replace('\n', '')
    temp_tuple = (group_name, summary_str)
    
    return temp_tuple

In [None]:
#review2acos = compile_acos(0, reviews_acos_category)
#group_name = categories[0]
#prompt = generate_prompt(group_name, review2acos, 150)
#print(prompt)

In [None]:
for i in range(len(reviews_acos_category)):
    temp_tuple = generate_summary_category(i)
    categories_output.append(temp_tuple)
#print(categories_output)

In [None]:
for category, text in categories_output:
    print(f"{category}:\n{text}\n")

#### Pass just quads (Grouped by Aspect) to gpt/davinci to generate summary

In [None]:
aspect_output = []

In [None]:
# Compile all acos quadruples for each category/aspect
# returns a list of acos quadruples for said aspect
def compile_acos(group_index, review_acos_dictionary):
    acos_quads = []
    group_name = review_acos_dictionary[group_index]['group_name']
    for i in range(review_acos_dictionary[group_index]['group_size']):        
        reviews = review_acos_dictionary[group_index]['group'][i]['acos_preds']
        for j in range(len(reviews)):
            if reviews[j]['aspect'] is not None and group_name in reviews[j]['aspect']:
                acos_quads.append(reviews[j])
    return acos_quads

# Function to generate a prompt
def generate_prompt(group_name, acos_quads, max_quads):
    
    # Format the acos quadruples into a string
    def acos2str(acos_quads):
        formatted_acos = ""
        for i, item in enumerate(acos_quads):
            #aspect = ' '.join(item['aspect']) if isinstance(item['aspect'], (list, tuple)) else item['aspect']
            opinion = ' '.join(item['opinion']) if isinstance(item['opinion'], (list, tuple)) else item['opinion']
            category = item['category']
            sentiment = item['sentiment']
            #formatted_acos += f"{i+1}. Aspect: {aspect}, Category: {category}, Opinion: {opinion}, Sentiment: {sentiment}\n"
            formatted_acos += f"{i+1}. Category: {category}, Opinion: {opinion}, Sentiment: {sentiment}\n"
        return formatted_acos.strip()
    
    acos_quads = acos_quads[:max_quads]
    prompt = f'Summarize the following restaurant reviews with at most 5 sentences. Instead of reviews, you are provided with aspect-category-opinion-sentiment quadruples. The summarization should only be about "{group_name}". '
    formatted_acos_string = acos2str(acos_quads)
    
    prompt = prompt + formatted_acos_string
    
    return prompt

def generate_summary_aspects(group_index):
    review2acos = compile_acos(group_index, reviews_acos_aspect)

    openai_model = "text-davinci-003"
    group_name = aspects[group_index]

    prompt = generate_prompt(group_name, review2acos, 150)

    resp = openai.Completion.create(
        model = openai_model,
        prompt = prompt,
        temperature = 0,
        max_tokens = 1000
    )

    summary_str = resp['choices'][0]['text'].replace('\n', '')
    temp_tuple = (group_name, summary_str)
    
    return temp_tuple

In [None]:
for i in range(len(reviews_acos_aspect)):
    temp_tuple = generate_summary_aspects(i)
    aspect_output.append(temp_tuple)

In [None]:
for aspect, text in aspect_output:
    print(f"{aspect}:\n{text}\n")

### Evaluation

In [20]:
reviews = [x['review'] for x in reviews_acos]

In [21]:
model_zs = SummaCZS(granularity="sentence", model_name="vitc", device="cuda") # If you have a GPU: switch to: device="cuda"

In [22]:
# convert review samples to list of strings
with open(review_sample, "r") as f:
    reviews_rest = json.load(f)
reviews_rest_concat = []
for restaurant in reviews_rest: 
    revs = ""
    for rev in restaurant['reviews']:
        revs += rev
    reviews_rest_concat.append(revs)

In [23]:
def split_and_rephrase(sum):
    sum1 = "The restaurant reviews cover a range of opinions on food quality, with many customers raving about the food. Several reviews praise the presentation of the food, and others mention specific dishes that were particularly good. There are also some negative reviews, with complaints about inconsistent quality, downsized portions, high prices, and poor service. However, overall, the majority of reviews are positive, with many customers recommending the restaurant and its food. The reviews cover a range of cuisines, including Mexican, Indian, and sushi, with some specific dishes mentioned as standouts."
    sum1_rephrased = "The presentation of the food is praised .\n Food quality is inconsistent .\n Portions are downsized .\n Prices are high .\n Service is poor .\n The restaurant and its food are recommended .\n Mexican food is good .\n Indian food is good .\n Sushi is good ."
    sum2 = "The restaurant reviews cover a variety of food types and service experiences. Some of the positive reviews include comments about the best tuna ever had, great value sushi with high quality, superb caesar salad, and the best crab cakes in town. On the other hand, some of the negative reviews mention poor customer service, poor quality pizza, and overpriced food. Additionally, some reviewers mention specific dishes they enjoyed such as the seabass on lobster risotto, honey walnut prawns, asparagus, and lobster 3 ways. Overall, there are mixed reviews, with some praising the food quality while others criticize the service and value."
    sum2_rephrased = "The tuna is the best that customers ever had .\n Sushi is of great value and high quality .\n Caesar salad is superb .\n Crab cakes are the best in town .\n Customer service is poor .\n Pizza is of poor quality .\n Food is overpriced .\n Customers enjoyed the seabass on lobster risotto .\n Customers enjoyed honey walnut prawns .\n Customers enjoyed asparagus .\n Customers enjoyed lobster 3 ways ."
    prompt = "Split and rephrase the following sentences into simple propositions: "
    input = prompt + sum1 + "\n Output: " + sum1_rephrased + "\n" + prompt + sum2 + "\n Output: " + sum2_rephrased + "\n" + prompt + sum + "\n Output: "
    # print(prompt)
    resp = openai.Completion.create(
        model="text-davinci-003",
        prompt=input,
        temperature=0,
        max_tokens=1000
    )
    pred_str = resp['choices'][0]['text']
    return pred_str

In [24]:
def compute_faithfulness(reviews, summary, t):
    avg_support = 0
    for sum in summary:
        support = 0
        # reviews_batch = random.sample(reviews, 20)
        for review in reviews:
            score_zs = model_zs.score([review], [sum])['scores'][0]
            # if score_zs > 0.2:
            if score_zs > t:
                support += 1
        avg_support += support

        print("summary:", sum)
        print("support:", support)
        print("=====")

    avg_support /= len(summary)
    print("faithfulness:", avg_support)
    return avg_support

In [25]:
def compute_factuality(reviews, summary):
    avg_topscore = 0
    for sum in summary:
        top_score = 0
        # reviews_batch = random.sample(reviews, 20)
        for review in reviews:
            score_zs = model_zs.score([review], [sum])['scores'][0]
            top_score = max(top_score, score_zs)
        avg_topscore += top_score

        print("summary:", sum)
        print("top score:", top_score)
        print("=====")

    avg_topscore /= len(summary)
    print("factuality:", avg_topscore)
    return avg_topscore

In [26]:
def compute_genericity(reviews, summary):
    # remove stop words and puctuations
    stop_words = set(stopwords.words('english'))
    summary_words = []
    for sum in summary:
        sum = sum.split(" ")
        sum = [word.lower() for word in sum if word not in stop_words and len(word) > 0 and word != "."]
        summary_words += sum

    avg_idf = 0
    for word in summary_words:
        count = 0
        for review in reviews:
            count += (review.find(word) != -1)
        # plus-one smoothing in case count = 0
        idf = len(reviews) / (count+1)
        avg_idf += idf

        print("word in summary:", word)
        print("count:", count)
        print("=====")

    avg_idf /= len(summary_words)
    print("genericity:", avg_idf)
    return avg_idf

In [27]:
summary_file = 'yelp_summary_category_review+acos'
with open(summary_file, 'r') as f:
    summary = f.read()
summary_split = split_and_rephrase(summary).split('\n')
summary_split = [x for x in summary_split if len(x) > 0]

In [28]:
summary_split

[' Oysters are of good quality .',
 ' Bloody Marys are of good quality .',
 ' Fresh oysters are of good quality .',
 ' Chargrilled oysters are of good quality .',
 ' Bread and butter is of good quality .',
 ' Crawfish puppies are of good quality .',
 ' Fried seafood is mediocre to horrible .',
 ' Gumbo has too much salt .',
 ' Staff is praised .',
 ' Service is praised .',
 ' Atmosphere is praised .',
 ' Wait is worth it .',
 ' Roscoe and Brandon are friendly .',
 ' Restaurant is a must-visit .',
 ' Wait can be long .',
 ' Service can be spotty .',
 ' Variety of style options is offered .',
 ' 15 Dozen Club is offered .',
 ' Soft shell crab is offered .',
 ' 1/2 and 1/2 is offered .',
 ' Beer on tap is offered .',
 ' Oyster bar is offered .',
 ' Splitting checks is offered .',
 ' Splitting credit card payments is offered .',
 ' Oysters are offered .',
 ' Atmosphere is offered .',
 ' Dress is offered .',
 ' Reservation is offered .',
 ' Food is offered .',
 ' Wait times are offered .',


In [29]:
compute_faithfulness(reviews, summary_split, 0.2)

summary:  Oysters are of good quality .
support: 62
=====
summary:  Bloody Marys are of good quality .
support: 24
=====
summary:  Fresh oysters are of good quality .
support: 24
=====
summary:  Chargrilled oysters are of good quality .
support: 38
=====
summary:  Bread and butter is of good quality .
support: 16
=====
summary:  Crawfish puppies are of good quality .
support: 4
=====
summary:  Fried seafood is mediocre to horrible .
support: 0
=====
summary:  Gumbo has too much salt .
support: 4
=====
summary:  Staff is praised .
support: 36
=====
summary:  Service is praised .
support: 61
=====
summary:  Atmosphere is praised .
support: 23
=====
summary:  Wait is worth it .
support: 64
=====
summary:  Roscoe and Brandon are friendly .
support: 5
=====
summary:  Restaurant is a must-visit .
support: 37
=====
summary:  Wait can be long .
support: 68
=====
summary:  Service can be spotty .
support: 28
=====
summary:  Variety of style options is offered .
support: 32
=====
summary:  15 Do

22.305882352941175

In [30]:
compute_factuality(reviews, summary_split)

summary:  Oysters are of good quality .
top score: 0.921905517578125
=====
summary:  Bloody Marys are of good quality .
top score: 0.530029296875
=====
summary:  Fresh oysters are of good quality .
top score: 0.8274078369140625
=====
summary:  Chargrilled oysters are of good quality .
top score: 0.872955322265625
=====
summary:  Bread and butter is of good quality .
top score: 0.5555419921875
=====
summary:  Crawfish puppies are of good quality .
top score: 0.56884765625
=====
summary:  Fried seafood is mediocre to horrible .
top score: 0
=====
summary:  Gumbo has too much salt .
top score: 0.616943359375
=====
summary:  Staff is praised .
top score: 0.78240966796875
=====
summary:  Service is praised .
top score: 0.942138671875
=====
summary:  Atmosphere is praised .
top score: 0.9144363403320312
=====
summary:  Wait is worth it .
top score: 0.947174072265625
=====
summary:  Roscoe and Brandon are friendly .
top score: 0.41455078125
=====
summary:  Restaurant is a must-visit .
top sco

0.6689736758961397

In [31]:
compute_genericity(reviews_rest_concat, summary_split)

word in summary: oysters
count: 8
=====
word in summary: good
count: 99
=====
word in summary: quality
count: 45
=====
word in summary: bloody
count: 2
=====
word in summary: marys
count: 0
=====
word in summary: good
count: 99
=====
word in summary: quality
count: 45
=====
word in summary: fresh
count: 61
=====
word in summary: oysters
count: 8
=====
word in summary: good
count: 99
=====
word in summary: quality
count: 45
=====
word in summary: chargrilled
count: 0
=====
word in summary: oysters
count: 8
=====
word in summary: good
count: 99
=====
word in summary: quality
count: 45
=====
word in summary: bread
count: 36
=====
word in summary: butter
count: 16
=====
word in summary: good
count: 99
=====
word in summary: quality
count: 45
=====
word in summary: crawfish
count: 2
=====
word in summary: puppies
count: 2
=====
word in summary: good
count: 99
=====
word in summary: quality
count: 45
=====
word in summary: fried
count: 36
=====
word in summary: seafood
count: 16
=====
word i

14.896027970748834

In [32]:
summary_file = 'yelp_summary_aspect_review+acos'
with open(summary_file, 'r') as f:
    summary = f.read()
summary_split = split_and_rephrase(summary).split('\n')
summary_split = [x for x in summary_split if len(x) > 0]

In [33]:
summary_split

['The oysters are delicious .',
 'The oysters are crispy .',
 'The oysters have a smoky flavor .',
 'The taste, texture, and flavor of the oysters are enjoyed .',
 'The food is awesome .',
 'The food is great .',
 'The food is amazing .',
 'The food is well-seasoned .',
 'The food is not the best .',
 'The food is meh .',
 'Acme is highly recommended .',
 'Acme has great quality and style options .',
 'Acme has great seafood .',
 'Acme has New Orleans cuisine .',
 'Acme is worth the wait .',
 'Acme is a favorite among customers .',
 'Acme is the best .',
 'Acme has long waits .',
 'Acme is busy .',
 'Acme is touristy .',
 'Acme is a tourist trap .',
 'The gumbo is very tasty .',
 'The gumbo is great .',
 'The gumbo is delicious .',
 "The gumbo is bowl-lickin' good .",
 'The gumbo is pretty good .',
 'The gumbo stood out .',
 'The gumbo is very good .',
 'The gumbo is good .',
 'The gumbo is too salty .',
 'The gumbo is not really edible .',
 'The staff is friendly .']

In [34]:
compute_faithfulness(reviews, summary_split, 0.2)

summary: The oysters are delicious .
support: 52
=====
summary: The oysters are crispy .
support: 5
=====
summary: The oysters have a smoky flavor .
support: 38
=====
summary: The taste, texture, and flavor of the oysters are enjoyed .
support: 13
=====
summary: The food is awesome .
support: 96
=====
summary: The food is great .
support: 107
=====
summary: The food is amazing .
support: 88
=====
summary: The food is well-seasoned .
support: 20
=====
summary: The food is not the best .
support: 19
=====
summary: The food is meh .
support: 4
=====
summary: Acme is highly recommended .
support: 42
=====
summary: Acme has great quality and style options .
support: 2
=====
summary: Acme has great seafood .
support: 72
=====
summary: Acme has New Orleans cuisine .
support: 6
=====
summary: Acme is worth the wait .
support: 61
=====
summary: Acme is a favorite among customers .
support: 7
=====
summary: Acme is the best .
support: 15
=====
summary: Acme has long waits .
support: 56
=====
sum

29.53125

In [35]:
compute_factuality(reviews, summary_split)

summary: The oysters are delicious .
top score: 0.9493370056152344
=====
summary: The oysters are crispy .
top score: 0.7845458984375
=====
summary: The oysters have a smoky flavor .
top score: 0.794464111328125
=====
summary: The taste, texture, and flavor of the oysters are enjoyed .
top score: 0.426605224609375
=====
summary: The food is awesome .
top score: 0.9821052551269531
=====
summary: The food is great .
top score: 0.9783859252929688
=====
summary: The food is amazing .
top score: 0.9808464050292969
=====
summary: The food is well-seasoned .
top score: 0.97918701171875
=====
summary: The food is not the best .
top score: 0.9802112579345703
=====
summary: The food is meh .
top score: 0.888031005859375
=====
summary: Acme is highly recommended .
top score: 0.93353271484375
=====
summary: Acme has great quality and style options .
top score: 0.570068359375
=====
summary: Acme has great seafood .
top score: 0.9760513305664062
=====
summary: Acme has New Orleans cuisine .
top scor

0.8295802474021912

In [36]:
compute_genericity(reviews_rest_concat, summary_split)

word in summary: the
count: 100
=====
word in summary: oysters
count: 8
=====
word in summary: delicious
count: 74
=====
word in summary: the
count: 100
=====
word in summary: oysters
count: 8
=====
word in summary: crispy
count: 12
=====
word in summary: the
count: 100
=====
word in summary: oysters
count: 8
=====
word in summary: smoky
count: 4
=====
word in summary: flavor
count: 64
=====
word in summary: the
count: 100
=====
word in summary: taste,
count: 2
=====
word in summary: texture,
count: 1
=====
word in summary: flavor
count: 64
=====
word in summary: oysters
count: 8
=====
word in summary: enjoyed
count: 39
=====
word in summary: the
count: 100
=====
word in summary: food
count: 94
=====
word in summary: awesome
count: 45
=====
word in summary: the
count: 100
=====
word in summary: food
count: 94
=====
word in summary: great
count: 91
=====
word in summary: the
count: 100
=====
word in summary: food
count: 94
=====
word in summary: amazing
count: 58
=====
word in summary: 

20.99237481490993