In [1]:
import os
from pathlib import Path
import json
from collections import defaultdict

In [2]:
e2e_path = Path(os.getenv("CURRICULA_DATA")) / "E2E" / "E2E.phrases.no-ol.jsonl"
viggo_path = Path(os.getenv("CURRICULA_DATA")) / "ViGGO" / "ViGGO.phrases.jsonl"

In [3]:
def load_dataset(path):
    ds = []
    with path.open('r') as fp:
        for line in fp:
            ex = json.loads(line)
            if ex['source']['mr']['da'] == 'inform':
                ds.append(ex)
    return ds

def get_phrases(ds, delex='delex'):
    phrases = defaultdict(lambda : defaultdict(set))

    for ex in DS[ds]:
        
        # Get MR as attribute/value sequence.
        src  = ex['source']['sequence'][f'rule_{delex}']
        
        # ViGGO has header tokens [da=???, rating=???, ....], so we remove them.
        if ds == 'ViGGO':
            src = src[2:]
            
        # Get the actual phrase text. 
        tgt = ex['target']['sequence'][f'{delex}']

        # Given am attr/val sequence of src = ['a1=v1', 'a2=v2', 'a3=v3', ..., 'an=vn']
        # Create n entries for i=1,n
        #    phrases[(a1=v1, ..., ai, ai+1=vi+1, ... an=vn)] = {
        #       ...
        #       (a1=v2,...,ai=vi,...,an=vn): set([tgt_a, tgt_b, ..., tgt_c])
        #       ...
        #    }
        for i in range(len(src)):
            k = list(src) # copy the attr/val sequence
            k[i] = k[i].split("=")[0] # replace the ith attr/val with just the attr
            k = tuple(k)
            phrases[k][tuple(src)].add(tuple(tgt))
    return phrases

def print_phrase_pairs(phrase_data, attribute1=None, attribute2=None, at_most=None):
    valid_keys = []
    for key in phrase_data.keys():
        if len(key) != 2:
            continue
        if (attribute1 is None or key[0].startswith(attribute1)) \
                and (attribute2 is None or key[1].startswith(attribute2)):
            valid_keys.append(key)
    valid_keys = sorted(valid_keys)
    for key in valid_keys:
        print(key)
        vals = sorted(phrase_data[key].keys())
        for val in vals:
            print("   ", val)
            
            phrases = sorted(phrase_data[key][val])
            if at_most is not None:
                lim = at_most
            else:
                lim = len(phrases)
                
            for phrase in phrases[:lim]:
                print("      ", " ".join(phrase))
            if lim < len(phrases):
                print("      ", ":") # : indicates there are more, increase at_most to see

In [4]:
DS = {
    "E2E": load_dataset(e2e_path),
    "ViGGO": load_dataset(viggo_path),
}

In [5]:
e2e_phrases = get_phrases('E2E')
viggo_phrases = get_phrases('ViGGO')

In [6]:
print_phrase_pairs(e2e_phrases, 'area', 'eat_type', at_most=25)

('area', 'eat_type=coffee shop')
    ('area=city centre', 'eat_type=coffee shop')
       a city centre coffee shop
       a city centre coffee shop offering
       city centre coffee shop
       city centre located coffee shop
       city centre near a coffee shop
       friendly city centre coffee shop
       in city centre near a coffee shop
       is located in the center of the city and features a casual coffee shop setting
       it 's location is in the city centre area and it 's a coffee shop
       it is located in the center of the city and features a casual coffee shop setting .
       located in city centre near a coffee shop
    ('area=riverside', 'eat_type=coffee shop')
       a beautiful riverside coffee shop
       a riverside area coffee shop
       a riverside café
       a riverside coffee
       a riverside coffee shop
       a riverside coffee shop offering
       along the riverside and within a coffee shop environment
       an upscale , riverside , coffee shop
  

In [7]:
print_phrase_pairs(e2e_phrases, at_most=25)

('area', 'area=city centre')
    ('area=riverside', 'area=city centre')
       a riverside - view located just past the city centre
       a riverside in the city centre
       along the river in the city center
       along the river in the city centre
       are located on the riverfront , near the city centre
       are placed close to the river , around city centre
       by a riverside in the city centre
       by the river and close to the city centre
       by the river in the city centre
       by the riverside in the city centre
       can find it by the riverside in the city centre
       find it by the riverside in the city centre
       is by the river and close to the city centre
       is located beside a river , and is conveniently located near city centre
       is located on the river in city centre
       is located on the river in city centre and serves pasta
       is located within the riverside area of the city centre
       it is located on the river in city cent

       available in the city centre at the NAME
       be found in city centre at NAME
       be found in the city center in the form of the NAME
       be found in the city centre at the NAME
       can be found in city centre at NAME
       can be found in the city center in the form of the NAME
       can be found in the city centre at the NAME
       centre city of NAME
       city center called the NAME
       city center is the location for the NAME .
       city centre 's NAME
       city centre , NAME
       city centre , NAME ,
       city centre NAME
       city centre at NAME
       city centre called NAME
       city centre called the NAME
       city centre located the NAME
       city centre name the NAME
       city centre named NAME
       :
('area=city centre', 'near')
    ('area=city centre', 'near=PLACEHOLDER')
       're located in the city centre by the NEAR
       's in the city centre , near the NEAR
       's in the city centre near NEAR
       's located in cit

       friendly riverside coffee shop
       in riverside where you can have coffee
       in the riverside area near the coffee shop
       is a riverside coffee shop
       is along the riverside and within a coffee shop environment
       it is a riverside coffee shop
       it is along the riverside and within a coffee shop environment .
       located in riverside where you can have coffee
       on the riverside that provides coffee shop type food
       picturesque riverside coffee shop
       riverside 's coffee shop
       riverside area coffee shop
       riverside café
       :
    ('area=riverside', 'eat_type=pub')
       a friendly riverside pub
       a river side pub
       a riverside - situated pub
       a riverside area pub
       a riverside pub
       a riverside pub offering
       an amazing riverside pub
       by the river is a nice pub
       by the riverside there is a pub
       down by the riverside there is a pub
       friendly riverside pub
       in a f

       rated 1 star of a possible 5 that serves italian meals
       rated one out of five for its italian themed meals
       that is rated 1 star of a possible 5 that serves italian meals
       with a 1 out of 5 customer rating they serve italian food
    ('customer_rating=3 out of 5', 'food=Italian')
       a guest rating of 3 out of 5 stars for their italian cuisine
       a three star italian joint
       has a customer rating 3 out of 5 and serves italian food
       is rated 3 out of 5 and serves italian food
       it is rated 3 out of 5 and serves italian food
       offering 3 star italian cuisine and spirits
       offers a guest rating of 3 out of 5 stars for their italian cuisine
       three star italian
    ('customer_rating=5 out of 5', 'food=Italian')
       5 star italian
       5 star italian cuisine
       5 star italian food
       5 stars for their italian cuisine
       5 stars serving up italian food
       a 5 out of 5 rated service for italian food
       a f

       average customer rating near NEAR
       average customer rating near the NEAR
       average customer ratings it is located near NEAR
       average customer ratings located near NEAR
       average customer ratings near NEAR
       average customer ratings near the NEAR
       average ratings located near to NEAR
       average ratings near NEAR
       average reviews located near the NEAR
       boasts an average customer rating and is located near NEAR
       customer rating is average and it 's located near NEAR .
       customers rate it average and it is close to the NEAR .
       :
    ('customer_rating=high', 'near=PLACEHOLDER')
       a high customer rating , located near the NEAR
       a high customer rating located near NEAR serving food with a price range 20 - 25l
       a high customer rating located near the NEAR
       a high customer rating near NEAR
       a high customer rating near the NEAR
       a high customer rating that bis near NEAR
       a high ratin

       average rating and indian food
       fairly good indian food
       good indian food
       has an average customer rating and provides indian food
       has an average customer rating and serves indian food
       has an average customer rating for its indian food
       has an average rating and serves indian food
       has an average rating for indian food
       has average rating and indian food
       is an average rated indian establishment
       it has an average rating for indian food .
       it serves average indian food .
       it serves fairly good indian food too
       moderately - rated indian dishes
       offering average indian food
       offering average indian food .
       provides average rated indian cuisine
       provides the customer with average rated indian food
       :
    ('customer_rating=average', 'food=Italian')
       's average in ratings , low and price and has an italian theme
       an average customer rating for serving italian food

       that has an eat type of a coffee shop that has a cheap price range
       the coffee shop 's cheap prices
       the coffee shop 's cheap prices make up for it
       type of a coffee shop that has a cheap price range
    ('eat_type=pub', 'price_range=cheap')
       a pub atmosphere for tourists on a budget
       a pub featuring low prices and serving burgers , fries , and drinks
       a pub north of the city centre that offers pub fare in a low price range
       a pub offering low price
       a pub providing excellent food in the low price range
       a pub providing refreshments in the low price range
       a pub providing sushi in the low price range
       a pub providing take away deliveries in the low price range
       a pub selling good sushi at low prices , but not familiar
       a pub serving inexpensive food
       a pub setting with cheap prices
       a pub that has a great price range
       a pub that is affordable
       a pub that offers food and drink at

       is n't family - friendly past customers rate it 5 out of 5
       is n't family friendly but has a customer rating of 5 out of 5
       is not a child friendly zone with a customer rating of 5 out of 5
       is not a family - friendly venue but has a 5 out of 5 customer rating
       is not a family - friendly venue but has a customer rating of 5 out of 5
       is not child friendly and has a rating of 5 out of 5
       is not child friendly and has been rated 5 out of 5 by customers
       :
    ('family_friendly=yes', 'customer_rating=5 out of 5')
       's also children - friendly and 5 out of 5 customers recommend it
       's child - friendly and has an excellent customer rating of 5 out of 5
       's family friendly and rated 5 out of 5
       's not a place to bring the family is rated 5 out of 5
       a child friendly environment with 5 out of 5 customer reviews
       a child friendly place with a customer rating of 5 out of 5
       a child friendly zone with a cus

    ('food=Indian', 'area=riverside')
       're looking for indian food in the riverside area
       5 , that serves indian food in the riverside area
       a place to grab some indian food if you happen to be by the riverside
       a top pick among indian cuisine restaurants in the riverside area
       a variety of indian food in the riverside area
       among indian cuisine restaurants in the riverside area
       an indian sited on the riverside
       features indian style food on the riverside
       for indian food in the riverside area
       for quality indian food priced higher than other restaurants located in riverside
       grab some indian food if you happen to be by the riverside
       has indian cuisine by the riverside
       has indian food in riverside
       if you 're looking for indian food in the riverside area
       indian cuisine in the riverside area
       indian cuisine near the riverside close
       indian cuisine restaurants in the riverside area
 

       a japanese place called the NAME
       a japanese style eatery NAME
       a japanese venue called NAME
       a place that serves japanese food is the NAME .
       an japanese the NAME
       can enjoy japanese food at the NAME
       can get japanese at NAME
       can get japanese food at the NAME
       can have japanese food from the NAME
       customers have rated japanese food serving NAME as average .
       enjoy japanese food at the NAME
       for a japanese menu , try the NAME .
       for a japanese style eatery NAME
       for japanese food , i would recommend the NAME .
       for japanese food , there is NAME .
       for japanese food , try NAME .
       for japanese food there is the NAME .
       for japanese food try NAME
       :
('food', 'near=PLACEHOLDER')
    ('food=Chinese', 'near=PLACEHOLDER')
       's got chinese food and is near NEAR
       a chinese food establishment located near NEAR
       chinese food is located near NEAR
       chinese food 

       friendly fast food venue with a customer rating of 3
       friendly fast food venue with a customer rating of 3 out
       is a fast food place and has a customer rating of 3 out of 5
       is a fast food place that received 3 out of 5 stars
       is a fast food place with a customer rating of 3 out of 5
       is a fast food venue rated 3 out of 5 stars
       is a fast food venue receiving a 3 out of 5 rating
       offers fast food and is rates 3 out of 5
       priced fast food and has a rating of 3 out of 5
       sells fast food and has a 3 out of 5 customer rating
       serve fast food and have a customer rating of 3 out of 5
       serves fast food and has a three star rating
       serves fast food with a customer rating of 3 out of 5
       serves fast food with an average customer rating of 3 out 5
       serving a fast food menu and scoring 3 out of 5 stars
       that serves fast food with an average customer rating of 3 out 5
       :
    ('food=Fast food', 'cu

       japanese and not family - friendly
       japanese eatery which is not family friendly
       japanese food but not child friendly
       japanese food but not family - friendly
       japanese food but not kid friendly
       japanese food for adults
       :
    ('food=Japanese', 'family_friendly=yes')
       a good place to go for a japanese meal especially if you have kids because of the family friendly ambiance
       a japanese family
       a japanese family - friendly place
       a japanese food experience for the family
       a japanese kids friendly place
       a japanese style , family
       a japanese which is kid friendly
       an excellent choice for authentic japanese cuisine in a family friendly environment
       are looking for japanese food place which is also kid friendly
       authentic japanese cuisine in a family friendly environment
       can find japanese food in a child - friendly environment
       find japanese food in a child - friendly enviro

       NEAR that is rated 1 out of 5
       NEAR with 1 out of 5
       NEAR with 1 out of 5 customer rating
       NEAR with a 1 out of 5 rating
       NEAR with a customer rating of 1
       NEAR with a customer rating of 1 out
       NEAR with a customer rating of 1 out 5
       NEAR with a customer rating of 1 out of 5
       are located near NEAR and are rated 1 out of 5
       are located near NEAR and have a customer rating of 1 out of 5
       are located near the NEAR , and have a customer rating of 1 out of 5
       by NEAR , with a one star rating from customers ,
       can be found near to the NEAR and has a customer rating of 1 out of 5
       fitzbilies is located near the NEAR and is rated 1 out of 5 by customers .
       found near NEAR it is customer rated 1 out of 5
       :
    ('near=PLACEHOLDER', 'customer_rating=3 out of 5')
       're located near the NEAR and have a customer rating of 3 out of 5
       's near the NEAR and is rated 3 out of 5
       NEAR , rate

    ('price_range=moderate', 'food=Fast food')
       a good price range for fast food
       a moderate - priced in fast food
       a moderate price fast food
       a moderate price range and fast food
       a moderately - priced fast - food chain
       a moderately - priced fast food establishment
       a moderately priced fast food
       a place for moderately priced fast food
       a range of moderately - priced fast foods
       a reasonably priced fast food
       above the average price range , that serves fast food
       adults that serves an array of moderately priced fast food
       also offers reasonably priced fast food around e20 - e25
       an array of moderately priced fast food
       an average price range serving fast food
       an average priced fast food
       average priced fast food establishment
       average prices and fast food
       averagely priced fast food
       centre with a moderate price range and fast food
       considered moderate price

       good value english food
       has a moderate price range and serves english food
       has a moderate price range for english food
       has a moderate price range that serves english food
       has an average price range for their english food dishes
       has moderate price for english food
       :
    ('price_range=moderate', 'food=Fast food')
       a good price range for fast food
       a moderate - priced in fast food
       a moderate price fast food
       a moderate price range and fast food
       a moderately - priced fast - food chain
       a moderately - priced fast food establishment
       a moderately priced fast food
       a place for moderately priced fast food
       a range of moderately - priced fast foods
       a reasonably priced fast food
       above the average price range , that serves fast food
       adults that serves an array of moderately priced fast food
       also offers reasonably priced fast food around e20 - e25
       an array of 

In [8]:
print_phrase_pairs(viggo_phrases, 'genres', 'platforms', at_most=25)

('genres', 'platforms=Nintendo')
    ('genres=pinball', 'platforms=Nintendo')
       an average pinball game for nintendo
       is an average pinball game for nintendo
    ('genres=platformer', 'platforms=Nintendo')
       's an excellent platformer game from nintendo
       an excellent platformer game from nintendo
       it 's an excellent platformer game from nintendo
    ('genres=puzzle', 'platforms=Nintendo')
       puzzle game for nintendo
       puzzle game for the nintendo
       this puzzle game is only available for nintendo .
('genres', 'platforms=PC')
    ('genres=indie', 'platforms=PC')
       's an indie game for pc
       an indie game for pc
       it 's an indie game for pc
    ('genres=real-time strategy', 'platforms=PC')
       's a real - time strategy game for pc
       a real - time strategy game for pc
       a real - time strategy game for pc only
       a real - time strategy game playable on pc only
       a real - time strategy pc game
       a real - time 

In [9]:
print_phrase_pairs(viggo_phrases, at_most=25)

('available_on_steam', 'esrb=E (for Everyone)')
    ('available_on_steam=no', 'esrb=E (for Everyone)')
       it 's not available on steam but its esrb rating is e ( for everyone ) .
('available_on_steam', 'esrb=M (for Mature)')
    ('available_on_steam=yes', 'esrb=M (for Mature)')
       is also available on steam and is rated m ( for mature )
       it 's also available on steam , and its content is rated m ( for mature ) .
       it is also available on steam and is rated m ( for mature ) .
('available_on_steam', 'has_linux_release=no')
    ('available_on_steam=no', 'has_linux_release=no')
       's no steam or linux availability though
       's not available on steam or linux , though
       does n't have any steam or linux support
       does not have a steam or linux release
       has no release on steam or linux
       has no steam or linux support
       has no steam release or linux release
       is n't available on steam and does n't have a linux release
       is not avai

    ('genres=adventure', 'exp_release_date=PLACEHOLDER')
       this adventure game is expected to come out in EXP_RELEASE_DATE .
    ('genres=driving/racing', 'exp_release_date=PLACEHOLDER')
       a driving / racing game that is going to be released on EXP_RELEASE_DATE
       a driving / racing game that will be released on EXP_RELEASE_DATE
       is a driving / racing game that is going to be released on EXP_RELEASE_DATE
       is a driving / racing game that will be released on EXP_RELEASE_DATE
    ('genres=sport', 'exp_release_date=PLACEHOLDER')
       a sports game expected to be released on EXP_RELEASE_DATE
       a sports game that will be released on EXP_RELEASE_DATE
       an nba basketball sports games poised to be released across all platforms on EXP_RELEASE_DATE
       is a sports game expected to be released on EXP_RELEASE_DATE
       is a sports game that will be released on EXP_RELEASE_DATE
       is an nba basketball sports games poised to be released across all platfo

    ('genres=simulation', 'player_perspective=bird view')
       sim with the option of bird view perspective too
       sim with the option of playing from a bird view perspective
       simulation game that has a bird view perspective
    ('genres=strategy', 'player_perspective=bird view')
       a strategic bird view perspective
       strategy , bird view game ,
       strategy game that is played from a bird view
       uses a strategic bird view perspective
    ('genres=tactical', 'player_perspective=bird view')
       's a tactical game where you control your characters from a bird view
       a tactical game where you control your characters from a bird view
       is tactical and offers a bird view
       it 's a tactical game where you control your characters from a bird view .
       the game is tactical and offers a bird view .
    ('genres=turn-based strategy', 'player_perspective=bird view')
       a good turn - based strategy game played from a bird view perspective
    

       a simulation , strategy game
       a simulation - strategy game
       a simulation - strategy game with average ratings
       a simulation and strategy game
       a simulation strategy
       a simulation strategy game
       an average simulation and strategy game
       an average simulation strategy game
       an okay simulation strategy game
       for a good simulation - strategy
       for a good simulation and strategy game
       for the sim strategy genre
       is a good simulation strategy game
       is a simulation , strategy game
       is a simulation - strategy game with average ratings
       is a simulation and strategy game
       is a simulation strategy game
       is an average simulation and strategy game
       it is a simulation - strategy game with average ratings .
       it received average reviews for the sim strategy genre .
       like simulation strategy games
       :
    ('genres=simulation', 'genres=turn-based strategy')
       a good simu

('has_multiplayer', 'name=PLACEHOLDER')
    ('has_multiplayer=yes', 'name=PLACEHOLDER')
       came out with an average rating that was multiplayer , named NAME
       the multiplayer game , NAME ,
('has_multiplayer', 'platforms=Nintendo')
    ('has_multiplayer=no', 'platforms=Nintendo')
       this single - player nintendo game
('has_multiplayer', 'platforms=PC')
    ('has_multiplayer=yes', 'platforms=PC')
       a multiplayer game that was only released on pc
       is a multiplayer game that was only released on pc
       multiplayer for the pc
       multiplayer out on pc
       with multiplayer for the pc
       with multiplayer out on pc
('has_multiplayer', 'platforms=PlayStation')
    ('has_multiplayer=yes', 'platforms=PlayStation')
       also has multiplayer , but is only available on playstation
('has_multiplayer', 'player_perspective=bird view')
    ('has_multiplayer=no', 'player_perspective=bird view')
       a single - player bird view
       while i was disappointed that 

       whether they game on the pc or playstation
    ('platforms=PC', 'platforms=Xbox')
       for pc and xbox
       of pc and xbox
       on platforms of pc and xbox
       pc and xbox
       platforms of pc and xbox
('platforms=PC', 'release_year')
    ('platforms=PC', 'release_year=PLACEHOLDER')
       a game for the pc that was released in RELEASE_YEAR
       a pc game released in RELEASE_YEAR
       a pc game that was released in RELEASE_YEAR
       a pc game that was released in RELEASE_YEAR to average reception
       for pc in RELEASE_YEAR
       for the pc in RELEASE_YEAR
       for the pc that was released in RELEASE_YEAR
       is a game for the pc that was released in RELEASE_YEAR
       is a pc game released in RELEASE_YEAR
       is a pc game that was released in RELEASE_YEAR
       is a pc game that was released in RELEASE_YEAR to average reception
       it was released for pc in RELEASE_YEAR .
       pc in RELEASE_YEAR
       released for pc in RELEASE_YEAR
       th

       's a game from RELEASE_YEAR with a t ( for teen ) rating
       RELEASE_YEAR is rated t
       a game from RELEASE_YEAR with a t ( for teen ) rating
       came out in RELEASE_YEAR and is rated t ( for teen )
       in RELEASE_YEAR is rated t
       it 's a game from RELEASE_YEAR with a t ( for teen ) rating .
       it came out in RELEASE_YEAR and is rated t ( for teen ) .
       it was released in RELEASE_YEAR and is rated t ( for teen ) .
       it was released in RELEASE_YEAR and rated t ( for teen ) .
       it was released in RELEASE_YEAR is rated t .
       it was released in RELEASE_YEAR with a t rating .
       released in RELEASE_YEAR , and rated t ( for teen )
       released in RELEASE_YEAR , with a rating of t ( for teen )
       released in RELEASE_YEAR and rated t ( for teen )
       released in RELEASE_YEAR is rated t
       released in RELEASE_YEAR with a rating of t ( for teen )
       released in RELEASE_YEAR with a t ( for teen ) esrb rating
       released i