# Notebook for automatically generating some dialogues in the soloist format

In [44]:
import pandas as pd
import random
import copy

Load the dataset:

In [45]:
db = pd.read_csv("/mount/studenten-temp1/users/godberja/SpokenDialogSystems/college_adviser/pruned_dataset_final_v3.csv")

In [46]:
name_changer = {"NAME": "name", "ALIAS": "alias", "CITY": "city", "STATE": "state", "REGION":"region", "URL": "url", "CONTROL":"control" ,
"NUM_UGDS": "students", "ADM_RATE":"admission_rate", "SAT_AVG": "sat", "COSTT4_A": "cost", 	"PRICE_RANGE": "price_range", "EARNINGS_MDN":"earnings", "DEBT_MDN":"debt", "COMPL_RATE": "completion",
"CIP04BACHL":"architecture", "CIP09BACHL":"journalism",	"CIP11BACHL":"computer science", "CIP13BACHL":"education", "CIP14BACHL":"engineering", "CIP16BACHL":"linguistics", "CIP22BACHL":"law", 	
"CIP23BACHL":"literature", "CIP26BACHL":"biology", "CIP27BACHL":"mathematics", "CIP38BACHL":"philosophy", "CIP40BACHL":"physics", "CIP42BACHL":"psychology", "CIP45BACHL":"social sciences",
"CIP50BACHL":"arts", "CIP51BACHL":"health", "CIP52BACHL":"business", "CIP54BACHL":"history",
"SUM": "areas"}

In [47]:
# change the column names!
print(db.columns)
db.rename(columns=name_changer,inplace=True)
print(db.columns)

Index(['UNITID', 'NAME', 'ALIAS', 'CITY', 'STATE', 'REGION', 'URL', 'CONTROL',
       'NUM_UGDS', 'ADM_RATE', 'SAT_AVG', 'COSTT4_A', 'PRICE_RANGE',
       'EARNINGS_MDN', 'DEBT_MDN', 'COMPL_RATE', 'CIP04BACHL', 'CIP09BACHL',
       'CIP11BACHL', 'CIP13BACHL', 'CIP14BACHL', 'CIP16BACHL', 'CIP22BACHL',
       'CIP23BACHL', 'CIP26BACHL', 'CIP27BACHL', 'CIP38BACHL', 'CIP40BACHL',
       'CIP42BACHL', 'CIP45BACHL', 'CIP50BACHL', 'CIP51BACHL', 'CIP52BACHL',
       'CIP54BACHL', 'SUM'],
      dtype='object')
Index(['UNITID', 'name', 'alias', 'city', 'state', 'region', 'url', 'control',
       'students', 'admission_rate', 'sat', 'cost', 'price_range', 'earnings',
       'debt', 'completion', 'architecture', 'journalism', 'computer science',
       'education', 'engineering', 'linguistics', 'law', 'literature',
       'biology', 'mathematics', 'philosophy', 'physics', 'psychology',
       'social sciences', 'arts', 'health', 'business', 'history', 'areas'],
      dtype='object')


In [48]:
# for each slot collect the possible values:
slot_values = dict()
for slot in db.columns:
    slot_values[slot] = list(set(db[slot].tolist()))
del slot_values["UNITID"] # we don't consider unit-ID as a slot
print(slot_values)

{'name': ['University of Pennsylvania', 'Duke University', 'Santa Clara University', 'Virginia Polytechnic Institute and State University', 'New Jersey Institute of Technology', 'University of Virginia-Main Campus', 'Drexel University', 'George Washington University', 'Grand Canyon University', 'Southern Methodist University', 'Dartmouth College', 'Columbia University in the City of New York', 'Wake Forest University', 'Carnegie Mellon University', 'Northeastern University', 'Vanderbilt University', 'Case Western Reserve University', 'Stanford University', 'Massachusetts Institute of Technology', 'Fairfield University', 'Villanova University', 'Georgia Institute of Technology-Main Campus', 'Fordham University', 'Georgetown University', 'Cornell University', 'University of San Diego', 'Lehigh University', 'Pace University', 'University of Notre Dame', 'University of California-Berkeley', 'New York University', 'University of Rochester', 'Rensselaer Polytechnic Institute', 'Tufts Univers

## Scripts to create simple dialogues automatically

In [6]:
# TODO: include further example texts
system_greetings = ["Hello! How can I help you?","Hi, I'm the college adviser, how can I help you?","Tell me what you are looking for.","Tell me more about the college you're looking for.","Hi how can I help you?","Hello! I'm the college adviser, how can I help you?","Hi, how may I be of assistance?","Greetings, how may I help you?"]
user_greetings = ["Hello!","Hi.","Hi","Hello","Hi!","What's up?","Yo"]
system_byes = ["Goodbye!","I hope I could help you.","See you soon.","Goodbye","Bye.","Your're welcome. Goodbye!","You're welcome, have a nice day!"]
user_byes = ["Thanks for your help.","Alright, bye!","Thanks, goodbye!","Bye","I don't need more information. Bye!","Thanks, that's all.","Okay, thanks. Goodbye!","Thank you for the information.","I see, thanks, bye","That sounds good, thank you!"]

In [7]:
def greet_dialogue():
    return (str("user : "+random.choice(user_greetings)),"none","Database 0 results",str("system : "+random.choice(system_greetings)))
def bye_dialogue(dialogue_history,belief_states,num_matches):
    return (dialogue_history+[str("user : "+random.choice(user_byes))],belief_states,num_matches,str("system : "+random.choice(system_byes)))

In [8]:
# example run
user_utterance,belief_state,num_matches,sys_reply = greet_dialogue()
print(user_utterance, belief_state,num_matches,sys_reply)
print(bye_dialogue([user_utterance,sys_reply],belief_state,num_matches))

user : Hello! none Database 0 results system : Hello! How can I help you?
(['user : Hello!', 'system : Hello! How can I help you?', 'user : Alright, bye!'], 'none', 'Database 0 results', 'system : Goodbye!')


In [52]:
# TODO: include further example texts
simple_slots = dict()
simple_slots["name"] = ["I want to know more about [name].","Please tell me more about [name].","[name] sounds interesting to me.","I want to study at [name].","How about [name]?","I want to study at [name].","Tell me more about the [name].","Can you give me more information on [name]?","I heard [name] is great. Can you tell me more about it?","I'm interested in the [name]."]
simple_slots["control"] = ["The college should be [control].", "I'm looking for a [control] college.", "I'm looking for a college that is [control].","I want it to be [control]."]
simple_slots["alias"] = ["How about [alias]?","I want to study at [alias].","I heard [alias] is great. Can you tell me more about it?","What is the [alias]?"]
simple_slots["city"] = ["I'm looking for a college in [city].","I want to study in [city].","Do you know any unis in [city]?","In [city].","I want to find a college in [city].","Is there a college in [city]?","I want to go to a college in [city]."]
simple_slots["state"] = ["I'm looking for a college in [state].", "I want to study in [state].","Do you know any unis in [state]?","In [state].","I'm interested in colleges in [state].","I want more information on colleges in [state].","I want to go to a college in [city]."]
simple_slots["region"] = ["I'm looking for a college in [region].","I'm looking for a college in the [region].","I want to study in [region].","Do you know any unis in [region]?","In the [region].","Show me colleges in the [region].","I want to go to a college in the [region]."]
simple_slots["url"] = [""]
simple_slots["students"] = [""]
simple_slots["admission_rate"] = ["The admission rate should be lower than [admission_rate].","The admission rate should be higher than [admission_rate]"]
simple_slots["sat"] = ["On average students should have an sat score of [sat]."]
simple_slots["cost"] = [""]
simple_slots["price_range"] = ["Show me [price_range] colleges.","Show me a [price_range] college.","[price_range] would be good.","It should be [price_range]","It's okay if the college is [price_range].","I can afford a [price_range] university.","Can you give me a [price_range] university?"]
simple_slots["earnings"] = []
simple_slots["debt"] = [""]
simple_slots["completion"] = [""]
simple_slots["area"] = ["I'd like to study [area]","Can you study [area] there?","I want to study [area].","Can you help me to find a college where I can study [area]?","Is there a study program related to [area] offered?","I want to do something with [area].","Is there a college that offers courses in [area]?"]
study_areas = ['architecture', 'journalism', 'computer science','education', 'engineering', 'linguistics', 'law', 'literature','biology', 'mathematics', 'philosophy', 'physics', 'psychology','social sciences', 'arts', 'health', 'business', 'history']
for study_area in study_areas:
    simple_slots[study_area] = [x.replace("[area]",str("["+study_area+"]")) for x in list(simple_slots["area"])]
simple_slots["areas"] = [""]

In [50]:
# TODO: include further example texts

system_replies = dict()

# no results
system_replies["None"] = ["There is no college matching your preferences.",
                          "I couldn't find a college for your request.",
                          "I have no information on such a college.",
                          "I'm sorry, I don't have any information on this.",
                          "My apologies, it appears that I don't have any information about such colleges. Would you like to try some other criteria?",
                          "I couldn't find any colleges matching your query. Do you want to change something?",
                          "I couldn't find any colleges matching your query. Would you like to try again?",
                          "I didn't find a result. Is there anything else I can help you with?"] 

# one result
system_replies["One"] = ["[name] is a [control] top university in [city], [state] with [students] undergraduate students. You can check out their website at [url].",
                         "The [name] is [price_range] and is located in [region]. There are courses offered in [area]. Do you need more information?",
                         "Check [url] if you want more information later.",
                         "I found a college that could interest you: [name]. Do you want to know more about it?",
                         "Studying at [name] is [price_range].",
                         "[name] is a [control] top university in [city], [state] with [students] undergraduate students. find out more about the at [url].",
                         "[name] matches your preferences.",
                         "Okay, how about [name]? It's a great university in [city], [state].",
                         "[name] is a [control] top university with [students] undergraduate students. Find out more about them at [url].",
                         "[name] has an admission rate of [admission_rate]",
                         "At [name] graduates earn about [earnings].",
                         "[name] is located in [city].",
                         "On average it costs [cost] dollars per year.",
                         "On average, students accepted into [name] have an sat score of [sat]."
                         "The [name] could interest you. Would you like to know more about it?",
                         "[name] might be a match for you. Do you want to know more about it?",
                         "For more information check its website at [url].",
                         "Your expected earnings will be around [earnings] dollars anually.",
                         "You can check out their website at [url].",
                         "For example [name] is located in [city]. The completion rate is [completion], but it is [price_range].",
                         "On average, students graduate there with [debt] dollars in debt.",
                         "[name] is a college in [city] and thus belongs to [region].",
                         "[name] is often called [alias].",
                         "The [name] offers studying [area],"
                         "In case you want more information: visit [url].",
                         "[students] attent the university.",
                         "May I offer [name]?"]

# two results
system_replies["Two"] = ["I found two universities that could be of interest to you, [name1] and [name2]. Would you like to know more about one of them?",
                         "There's two colleges that match your preferences: [name1] and [name2]. Do you want to know more about one of them?",
                         "For example [name] is located in [city]. The completion rate is [completion], but it is [price_range].",
                         "Both, [name1] and [name2] match your preferences."]

# many results
system_replies["Many"] = ["Do you have any other requirements for the college?",
                          "There's many. Is there anything else you want from the college?",
                          "Do you have other criteria for the college?",
                          "Alright. There is a lot of colleges left. What specifically are you looking for?",
                          "Do you want to limit the search for a college further?",
                          "In which region do you want to study?",
                          "What subject would you like to study?",
                          "What would you like to study?",
                          "Whare are you interested in studying?",
                          "Which place would you like to study in?",
                          "Are you looking for a specific price range?"
                          "How expensive would you like the college to be?",
                          "Where do you want your college to be?",
                          "Do you have any other wishes to limit the amount of colleges?",
                          "Would you like to narrow your search?",
                          "There's multiple colleges that match your criteria. Would you like to narrow down your search?",
                          "Do you have anything else in mind?"]

In [11]:
def generate_dialog_small(current_db,slot,value):
    '''
    Returns a user utterance (for history), the belief state, the amount of database results, and a possible system response
    Note that formalities like greeting and goodbye are not included
    Only consider ONE slot/value
    Params:
        current_db: database (eventually pre-filtered) that is considered for checking the amount of results
        slot: the slot the dialogue should cover, e.g. name, region, ...
        value: the concrete value of the slot, e.g. expensive, private, ...
    '''
    user_history = random.choice(simple_slots[slot])
    user_history = user_history.replace(str("["+slot+"]"),value)
    belief = str(slot+" = "+value)
    small_db = current_db.loc[current_db[slot]==value]
    num_results = len(small_db)
    if num_results == 0:
        results = "Database 0 results"
        system_reply = random.choice(system_replies["None"])
    elif num_results == 1:
        results = "Database 1 result"
        system_reply = random.choice(system_replies["One"])
    elif num_results == 2:
        results = "Database 2 results"
        system_reply = random.choice(system_replies["Two"])
    else: # num_results > 1
        results = "Database " + str(num_results) + " results"
        system_reply = random.choice(system_replies["Many"])
    return user_history, belief, results, system_reply, small_db

In [12]:
# example run
user_history,belief,results,system_reply,small_db = generate_dialog_small(db,"control","private")
print(user_history)
print(belief)
print(results)
print(system_reply)

I'm looking for a college that is private.
control = private
Database 41 results
There's many. Is there anything else you want from the college?


In [13]:
def generate_dialogue(current_db,slots_values):
    '''
    Generate a dialogue (sometimes including greeting and goodbye) with the specified slots and values.
    Parameters:
        current_db: database that is considered for checking the amount of results
        slots_values (dict): key = slot, value = value; one key should be covered in one turn of the dialogue
    Returns list of created dialogues
    '''
    dialogues = []
    dialogue = {"history":[],"belief":"","kb":"","reply":""}
    history = []

    random_start = random.choice([i for i in range(10)])
    if random_start == 5: # only once in 10 dialogues we want to include a start
        user_greeting,belief_state,num_results,system_greeting = greet_dialogue()
        dialogue["history"] += [user_greeting]
        dialogue["belief"] = belief_state # in the beginning this is always none
        dialogue["kb"] = num_results
        dialogue["reply"] = system_greeting
        # print(dialogue)
        dialogues += [copy.deepcopy(dialogue)]
        history = [user_greeting,system_greeting]
        
    for slot,value in slots_values.items():
        u_h,b_s,n_r,s_r,small_db = generate_dialog_small(current_db,slot,value)
        dialogue["history"] = history + [str("user : "+u_h)]
        if (dialogue["belief"] == ""): # no belief state yet
            dialogue["belief"] = b_s
        elif (dialogue["belief"] == "none") and (b_s != "none"): # we found something
            dialogue["belief"] = b_s
        elif (dialogue["belief"] != "none") and (b_s != "none"): # concatenate belief states
            dialogue["belief"] = dialogue["belief"] + " ; " + b_s
        dialogue["kb"] = n_r
        dialogue["reply"] = str("system : " + s_r)
        # print(dialogue)
        dialogues += [copy.deepcopy(dialogue)]
        history = dialogue["history"] + [dialogue["reply"]]
        current_db = small_db
            
    random_end = random.choice([i for i in range(1)])
    if random_end == 5: # only once in 10 dialogues we want to include an end
        user_bye,b_s,n_r,system_bye = bye_dialogue(history,dialogue["belief"],dialogue["kb"])
        dialogue["history"] = user_bye
        dialogue["reply"] = system_bye
        # print(dialogue)
        dialogues += [copy.deepcopy(dialogue)]

    return dialogues

In [14]:
# example run
generate_dialogue(db,{"control":"private","state":"California"})

[{'history': ["user : I'm looking for a college that is private."],
  'belief': 'control = private',
  'kb': 'Database 41 results',
  'reply': 'system : Do you want to limit the search for a college further?'},
 {'history': ["user : I'm looking for a college that is private.",
   'system : Do you want to limit the search for a college further?',
   'user : I want to study in California.'],
  'belief': 'control = private ; state = California',
  'kb': 'Database 5 results',
  'reply': "system : There's many. Is there anything else you want from the college?"}]

## Create sample dialogues covering multiple / all slots / values

In [49]:
dialogue_configs = [] # list of dictionaries with up to three keys, key = slot, value = value; no combination occurs twice
slot_values_TODO = copy.deepcopy(slot_values)
slot_list = list(set(slot_values_TODO.keys()))
while len(slot_values_TODO)>0:
    config = dict()
    turns_in_dialog = random.choice([i for i in range(1,4)]) # up to three turns # TODO: maybe only do up to two dialogues?
    try:
        random.shuffle(slot_list)
        slots_to_consider = slot_list[:min(len(slot_list),turns_in_dialog)]
        # print("slots to consider: ",slots_to_consider)
        for slot in slots_to_consider:
            value = random.choice(slot_values_TODO[slot])
            config[slot] = value
            # print("chosen slot-value combi: ",config)
            slot_values_TODO[slot].remove(value)
            if len(slot_values_TODO[slot]) == 0:
                slot_values_TODO.pop(slot)
                slot_list.remove(slot)
        dialogue_configs += [copy.deepcopy(config)]
        # print("left slot-values: ",slot_values_TODO)
    except:
        print("error: ",slot_values_TODO)
print(len(dialogue_configs))
print(dialogue_configs[:10])

263
[{'students': 6500}, {'city': 'Houghton'}, {'price_range': 'very expensive'}, {'history': 0, 'engineering': 0}, {'business': 1, 'mathematics': 0, 'completion': '80.0%'}, {'students': 17000}, {'cost': 72900, 'social sciences': 1, 'control': 'private'}, {'admission_rate': '5.3%', 'mathematics': 1, 'linguistics': 0}, {'state': 'New Hampshire', 'education': 0, 'engineering': 1}, {'city': 'Bethlehem', 'region': 'New England'}]


In [None]:
all_dialogues = []
for dialog_config in dialogue_configs:
    dialogues = generate_dialogue(db,dialog_config)
    all_dialogues.append(dialogues)

## Manually add further dialogues

In [None]:
others = [
    {
      "history": [
        "user : do you know any unis in wisconsin?"
      ],
      "belief": "belief : state = wisconsin" ,
      "kb": "database 0 results",
      "reply": "system : i couldn't find any colleges matching your query. would you like to try again?"
    },
    {
      "history": [
        "user : do you know any unis in wisconsin?",
        "system : i couldn't find any colleges matching your query. would you like to try again?",
        "user : how about michigan?"
      ],
      "belief": "belief : state = michigan" ,
      "kb": "database 2 results",
      "reply": "system : what would you like to study there?"
    },  
    {
      "history": [
        "user : do you know any unis in wisconsin?",
        "system : i couldn't find any colleges matching your query. would you like to try again?",
        "user : how about michigan?",
        "system : what would you like to study there?",
        "user : education"
      ],
      "belief": "belief : state = michigan; area = education" ,
      "kb": "database 1 results",
      "reply": "system : how about [name]? it's a great university in [city], [state]."
    },  
    {
      "history": [
        "user : do you know any unis in wisconsin?",
        "system : i couldn't find any colleges matching your query. would you like to try again?",
        "user : how about michigan?",
        "system : what would you like to study there?",
        "user : education",
        "system : how about university of michigan-ann arbor? it's a great university in ann arbor, michigan.",
        "user : can you also study law there?"
      ],
      "belief": "belief : state = michigan; area = law; name = university of michigan-ann arbor" ,
      "kb": "database 0 results",
      "reply": "system : no, you can't study that at [name]."
    },  
    {
      "history": [
        "user : do you know any unis in wisconsin?",
        "system : i couldn't find any colleges matching your query. would you like to try again?",
        "user : how about michigan?",
        "system : what would you like to study there?",
        "user : education",
        "system : how about university of michigan-ann arbor? it's a great university in ann arbor, michigan.",
        "user : can you also study law there?",
        "system : no, you can't study that at university of michigan-ann arbor.",
        "user : thanks!"
      ],
      "belief": "belief : state = michigan; area = law; name = university of michigan-ann arbor" ,
      "kb": "database 0 results",
      "reply": "system : is there anything else i can help you with?"
    },  
    {
      "history": [
        "user : do you know any unis in wisconsin?",
        "system : i couldn't find any colleges matching your query. would you like to try again?",
        "user : how about michigan?",
        "system : what would you like to study there?",
        "user : education",
        "system : how about university of michigan-ann arbor? it's a great university in ann arbor, michigan.",
        "user : can you also study law there?",
        "system : no, you can't study that at university of michigan-ann arbor.",
        "user : thanks!",
        "system : is there anything else i can help you with?",
        "user : no, goodbye!"
      ],
      "belief": "belief : state = michigan; study = education; name = university of michigan-ann arbor" ,
      "kb": "database 1 results",
      "reply": "system : goodbye!"
    },
    # TODO check our already collected dialogues and include them here
]

## Further notes

* Until now we expect in each turn that there's some information given, but what about:
    * "user : tell me more about it"
    * "user : don't care"
* Also we don't consider requestables, like:
    * "user : What is its admission rate?"
* User only mentioning a value, like:
    * "user : Education" (as a reply to what the user would like to study)
* User mentioning more than one slot at a turn, e.g.
    * "user : Show me cheap universities where I can study maths" (price_range + area)
* We don't cover questions like: "system : Would you like to know more about it?"
* We don't cover users mentioning the profession (e.g. lawyer instead of law)

## Manually add dialogues for some edge cases

In [None]:
edge_cases = dict()

In [None]:
# information not existing / no result
edge_cases["no info"] = [ # TODO add more edge cases for this category
    {
      "history": [
        "user : Where is the University of Stuttgart located?"
      ],
      "belief": "belief : name = University of Stuttgart",
      "kb": "Database 0 results",
      "reply": "system : I'm sorry, I don't have any information on this college."
    },
    {
      "history": [
        "user : What about a college in Stuttgart?"
      ],
      "belief": "belief : city = Stuttgart",
      "kb": "Database 0 results",
      "reply": "system : Sorry, I have no information about colleges in Stuttgart. Can I help you any further?"
    }
]

In [None]:
# paraphrases
edge_cases["paraphrases"] = [ # TODO add more edge cases for this category
    
]

In [None]:
# user requests
edge_cases["user requests"] = [ # TODO add more edge cases for this category
    {
      "history": [
        "user : What is the SAT-score of the Santa Clara University? "
      ],
      "belief": "belief : name = Santa Clara University",
      "kb": "Database 1 result",
      "reply": "system : [name] has a SAT-score of [sat]."
    }
]

In [None]:
# typos
edge_cases["typos"] = [ # TODO add more edge cases for this category
    {
      "history": [
        "user : Is there a University in Nwe York?",
        "system : Did you mean New York?",
        "user : Yes."
      ],
      "belief": "belief : city = New York",
      "kb": "Database 1 result",
      "reply": "system : The [name] is in [city]."
    },
    {
      "history": [
        "user : Is there a University in Nwe York?"
      ],
      "belief": "belief : city = Nwe York",
      "kb": "Database 0 results",
      "reply": "system : Did you mean New York?"
    },
    {
      "history": [
        "user : Is there a University in Nwe York?"
      ],
      "belief": "belief : city = Nwe York",
      "kb": "Database 0 results",
      "reply": "system : I didn't find a result. Is it possible you spelled something wrong?"
    }
]

In [None]:
# negations
edge_cases["negations"] = [ # TODO add more edge cases for this category
        {
      "history": [
        "user : Hi!",
        "system : How can I help you?",
        "user : For my studies I want to move away from New York. Which college might be a good choice?"
      ],
      "belief": "belief : city != New York",
      "kb": "Database 42 results",
      "reply": "system : Do you have other criteria for the college?"
    },
    {
      "history": [
        "user : Hi!",
        "system : How can I help you?",
        "user : For my studies I want to move away from New York. Which college might be a good choice?",
        "system : Do you have other criteria for the college?",
        "user : It should offer a study program of education"
      ],
      "belief": "belief : state != New York ; area = education",
      "kb": "Database 22 results",
      "reply": "system : Alright. There is a lot of colleges left. Any other criteria left?"
    }
]

In [None]:
# larger or smaller
edge_cases["relative_values"] = [ # TODO add more edge cases for this category
        {
      "history": [
        "user : Hi",
        "system : Hello! How can I help you?",
        "user : Can you give me information on the Princeton University?",
        "system : The Princeton University is located in New Jersey. It's admission rate is 4%.",
        "user : Okay. Do you have information on another college in New Jersey that has a higher admission rate?"
      ],
      "belief": "belief : admission_rate >= 4 ; city = New Jersey",
      "kb": "Database 1 result",
      "reply": "system : I can advise you studying at [name]"
    },
    {
      "history": [
        "user : Can you help me to find a college where I can study engineering?",
        "system : Do you have any other requirements for the college?",
        "user : It should be likely to complete the studies"
      ],
      "belief": "belief : area = engineering ; completion >= 50",
      "kb": "Database 45 results",
      "reply": "system : There still is a lot of possible colleges."
    },
    {
      "history": [
        "user : Is it correct that the Yale University is expensive but still has a high completion rate?"
      ],
      "belief": "belief : name = Yale university ; price_range = expensive ; completion rate >= 70",
      "kb": "Database 1 result",
      "reply": "system : [name] is [price_range] and has an admission rate of [admission rate]."
    },
    {
      "history": [
        "user : Is there a college with an admission rate higher than 0.8?"
      ],
      "belief": "belief : admission_rate >= 0.8",
      "kb": "Database 5 results",
      "reply": "system : Do you want to limit the search for a college further?"
    }
]

In [None]:
# opinion change
edge_cases["opinion_change"] = [ # TODO add more edge cases for this category
    {
      "history": [
        "user : Tell me more about Georgetown University."
      ],
      "belief": "belief : name = Georgetown University",
      "kb": "Database 1 result",
      "reply": "system : [name] is a [control] college in [city]"
    },
    {
      "history": [
        "user : Tell me more about Georgetown University.",
        "system : Georgetown University is a private college in Washington.",
        "user : And what about the University of California-Berkeley?"
      ],
      "belief": "belief : name = University of California-Berkeley",
      "kb": "Database 1 result",
      "reply": "system : [name] is a [control] college in [city]"
    },
    {
      "history": [
        "user : Tell me more about Georgetown University.",
        "system : Georgetown University is a private college in Washington.",
        "user : And what about the UC Berkeley?"
      ],
      "belief": "belief : alias = UC Berkeley",
      "kb": "Database 1 result",
      "reply": "system : [name] is a [control] college in [city]"
    }
]

In [None]:
# two slots in one turn
edge_cases["two slots"] = [ # TODO add more edge cases for this category

]