In [1]:
from textwrap import dedent
from dotenv import load_dotenv

load_dotenv()

True

# Data

In [2]:
from datasets import load_dataset

dataset = load_dataset("yqzheng/semeval2014_restaurants")

In [3]:
import pandas as pd
train = pd.DataFrame(dataset['train'])
test = pd.DataFrame(dataset['test'])
train.shape, test.shape

((3608, 5), (1120, 5))

In [4]:
def merge_jsons(json_list):
    result = {}
    for j in json_list:
        result.update(j)
    return result

def create_json(df):
    df['json'] = df.apply(lambda row: {row['aspect']: row['label']} , axis=1)
    return df.groupby('text')['json'].agg(merge_jsons).reset_index()

In [5]:
train_json = create_json(train)
test_json = create_json(test)
train_json.head(10)

Unnamed: 0,text,json
0,"$160 for 2 filets, 2 sides, an appetizer and d...","{'filets': 0, 'sides': 0, 'appetizer': 0, 'dri..."
1,$20 for all you can eat sushi cannot be beaten.,{'sushi': 0}
2,$20 gets you unlimited sushi of a very high qu...,"{'sushi': 1, 'sushi places': 1, 'quality': 1}"
3,"$6 and there is much tasty food, all of it fre...",{'food': 1}
4,"($200 for 2 glasses of champagne, not too expe...","{'glasses of champagne': -1, 'bottle of wine':..."
5,(Always ask the bartender for the SEASONAL bee...,"{'SEASONAL beer': 1, 'bartender': 0}"
6,(and I have eaten my share) Which impresses me...,{'serve': 1}
7,"(food was delivered by a busboy, not waiter) W...","{'food': 0, 'busboy': -1, 'waiter': -1, 'chees..."
8,- the bread at the beginning is super tasty an...,"{'bread': 1, 'pizza': 1, 'margarite pizza with..."
9,20 minutes for our reservation but it gave us ...,"{'reservation': -1, 'cocktails': 1, 'surroundi..."


# Model

In [6]:
import os
HF_API_TOKEN = os.getenv('HF_API_TOKEN')

In [7]:
import requests
API_URL = "https://api-inference.huggingface.co/models/kevinscaria/joint_tk-instruct-base-def-pos-neg-neut-restaurants"
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Evaluation

In [8]:
def calc_f1(tp: int, fp: int, fn: int) -> float:
    if tp + fp == 0: 
        precision = 0
    else:
        precision = tp / (tp + fp)
    
    if tp + fn == 0:
        recall = 0
    else:
        recall = tp / (tp + fn)
    
    if precision + recall == 0:
        f1_score = 0
    else:
        f1_score = 2.0 * (precision * recall) / (precision + recall)
    return f1_score

def validate_f1(example: dict, pred: dict) -> float:
    tp = sum(1 for k, v in example.items() if pred.get(k) == v)
    fp = sum(1 for k, v in pred.items() if example.get(k) != v)
    fn = sum(1 for k, v in example.items() if k not in pred.keys())
    return calc_f1(tp, fp, fn)

# Original Prompt

In [9]:
orig_prompt_template = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none.
Positive example 1-
input: With the great variety on the menu , I eat here often and never get bored.
output: menu:positive
Positive example 2- 
input: Great food, good size menu, great service and an unpretensious setting.
output: food:positive, menu:positive, service:positive, setting:positive
Negative example 1-
input: They did not have mayonnaise, forgot our toast, left out ingredients (ie cheese in an omelet), below hot temperatures and the bacon was so over cooked it crumbled on the plate when you touched it.
output: toast:negative, mayonnaise:negative, bacon:negative, ingredients:negative, plate:negative
Negative example 2-
input: The seats are uncomfortable if you are sitting against the wall on wooden benches.
output: seats:negative
Neutral example 1-
input: I asked for seltzer with lime, no ice.
output: seltzer with lime:neutral
Neutral example 2-
input: They wouldnt even let me finish my glass of wine before offering another.
output: glass of wine:neutral
Now complete the following example-
input: {}
output:"""

In [10]:
res = query(orig_prompt_template.format(test.loc[0, 'text']))
res

[{'generated_text': 'bread:positive'}]

In [11]:
def convert_sentiment(text):
    if text == 'positive':
        return 1
    elif text == 'negative':
        return -1
    elif text == 'neutral':
        return 0
    else:
        return None

def convert_dict(res):
    return {l.split(":")[0].strip(): convert_sentiment(l.split(":")[1].strip()) for l in res[0]['generated_text'].split(",") if ':' in l}

In [12]:
f1s = []
for _, row in test_json.iloc[:100].iterrows():
    res = query(orig_prompt_template.format(row['text']))
    pred = convert_dict(res)
    f1 = validate_f1(row['json'], pred)
    f1s.append(f1)
sum(f1s) / len(f1s)

[{'generated_text': 'menu:positive, pub fare:positive, burgers:positive, steaks:'}]
[{'generated_text': 'sashimi:negative'}]
[{'generated_text': 'prices:positive'}]
[{'generated_text': 'gratuity:neutral, bill:neutral'}]
[{'generated_text': 'atmosphere:positive, drinks:positive, appetizers:positive'}]
[{'generated_text': 'bar service:negative'}]
[{'generated_text': 'treats:positive, prices:positive'}]
[{'generated_text': 'space:positive, cuisine:positive, space:positive'}]
[{'generated_text': 'outdoor eating area:positive, space:positive'}]
[{'generated_text': 'utensils:negative, pie:neutral'}]
[{'generated_text': 'music:positive, lounge:neutral'}]
[{'generated_text': 'waiter:negative, kitchen:neutral, order:neutral'}]
[{'generated_text': 'poori:positive, naan:positive, paratha:positive'}]
[{'generated_text': 'apetizers:positive, Sangria:positive'}]
[{'generated_text': 'toppings:positive, vegetables:positive, crust:positive'}]
[{'generated_text': 'Big Mac:positive, Chicken McNuggets:pos

0.7375714285714285

# DSPY