# Welcome to Game of Dialogs 

## Description

Game of Dialogs is an attempt to create a Knowledge Base for creating conversational datasets for building end to end dialog systems

### What is the Knowledge Base ?

A Knowledge Base is a publically available Data Base that can be used to generate profiles that will be used to make unique and disctict conversations

**Importing Libraries**

In [2]:
import pandas as pd
import random

## Game of Dialogs.xlsx

Below is the file called Game of Dialogs.xlsx. The filed contains the suggested values for each of the slot that we intend to use to create the dialog.
From this excel sheet, then we create a list of possible values which will then be used to create the various user profiles for the Dialog.

In [3]:
data_frame = pd.read_excel('Game_of_Dialogs.xlsx','VARIABLE_VALUES')

In this cell we create the user_values for the various user profiles

In [4]:
user_values = dict()
for index,row in data_frame.iterrows() :
    for column in data_frame.columns :
        list_of_values = list()
        if column in user_values.keys() :
            list_of_values = user_values[column]
        if not pd.isnull(row[column]) :
            list_of_values.append(row[column])
        user_values[column] = list_of_values

In [5]:
data_frame_value_association = pd.read_excel('Game_of_Dialogs.xlsx','COMPANY_ROLE')
partner_role_dict = dict()

for index,row in data_frame_value_association.iterrows() :
    if row["note"] in partner_role_dict.keys() :
        list_of_organizations = partner_role_dict[row["note"]]
    else :
        list_of_organizations = list()
        
    if not pd.isnull(row["company_name"]) :
        list_of_organizations.append(row["company_name"])
        partner_role_dict[row["note"]] = list_of_organizations

print(partner_role_dict)

{'salary': ['STARK INDUSTRIES', 'PYM INDUSTRIES', 'ACME CORP'], 'electricity bill': ['BURNS INDUSTRIES', 'POWER CORP', 'ENERGY DISTRIBUTORS'], 'telephone bill': ['SUPER NETWORK', 'CONNECTION GENE', 'INTERCONNECT TECH'], 'pension': ['STARK INDUSTRIES', 'PYM INDUSTRIES', 'ENERGY DISTRIBUTORS']}


In [6]:
data_frame_note_flow = pd.read_excel('Game_of_Dialogs.xlsx','FLOW_DICTIONARY')
note_flow_dict = dict()

for index,row in data_frame_note_flow.iterrows() :
    if row["accounted/credited"] in note_flow_dict.keys() :
        list_of_notes = note_flow_dict[row["accounted/credited"]]
    else :
        list_of_notes = list()
        
    if not pd.isnull(row["note"]) :
        list_of_notes.append(row["note"])
        note_flow_dict[row["accounted/credited"]] = list_of_notes
print(note_flow_dict)

{'credited': ['salary', 'pension', 'money transfer'], 'accounted': ['electricity bill', 'telephone bill', 'money transfer', 'house rent']}


## How to create User values

Below you will see the creation of two items i.e **list_of_columns** and **knowledge_base**

1. **list_of_collumns** : Contains the user slot values that we intend for each _user profile_
2. **knowledge_base** : This is the data_frame that is usually converted to an excel file that displays various user_profiles

In [7]:
list_of_columns = ['name','partner_names','user_accounts','balance','limit','transaction_ids','card_names','card_ids','notes','note:associated_partner','note:flow','note:amount','note:date','company_names']
knowledge_base = pd.DataFrame(columns=list_of_columns)

Function to create the knowledge base and convert it to an excel sheet

In [7]:
def create_user_profiles(number_of_profiles,user_values,file_name,list_of_columns,knowledge_base) :
    
    profile_set = list()
    count = 1
    while len(profile_set) < number_of_profiles :
        custom_user = dict()
        
        #1 Set up name
        custom_user['name'] = "player_{}".format(str(count))
        count += 1
        
        #2 Set up partner names
        number_of_partners = random.randint(1,len(user_values['partner_names']))
        custom_user_partner_names = random.sample(user_values['partner_names'],number_of_partners)
        custom_user['partner_names'] = ','.join(custom_user_partner_names)
        
        #3 Set user_accounts
        number_of_accounts = random.randint(1,len(user_values['user_accounts']))
        custom_user_accounts = random.sample(user_values['user_accounts'],number_of_accounts)
        custom_user['user_accounts'] = ','.join(custom_user_accounts)
        
        #4 Set balance
        custom_user['balance'] = random.sample(user_values['amount_values'],1)[0]
        
        #5 Set limit
        custom_user['limit'] = random.sample(user_values['amount_values'],1)[0]
        
        #6 Set transaction_ids
        number_of_partner_for_transaction_ids = random.randint(1,len(custom_user_partner_names))
        list_of_partners_with_transaction_ids = random.sample(custom_user_partner_names,number_of_partner_for_transaction_ids)
        
        transaction_id_list = list()
        
        for partner in list_of_partners_with_transaction_ids :
            
            account_chosen = random.sample(custom_user_accounts,1)[0]
            amount_chosen = random.sample(user_values['amount_values'],1)[0]
            
            while amount_chosen > custom_user["limit"] :
                amount_chosen = random.sample(user_values["amount_values"],1)[0]
            
            transaction_id_created = "{}-{}-{}".format(partner,int(amount_chosen),account_chosen)
            
            transaction_id_list.append(transaction_id_created)
            
        custom_user['transaction_ids'] = ','.join(transaction_id_list)
        
        #7 Set card names
        number_of_cards = random.randint(1,len(user_values['card_names']))
        custom_user_card_names = random.sample(user_values['card_names'],number_of_cards)
        custom_user['card_names'] = ','.join(custom_user_card_names)
        
        
        #8 Set card_ids
        card_id_list = list()
        for card_name in custom_user_card_names :
            linked_account = random.sample(custom_user_accounts,1)[0]
            card_id = '{}-{}'.format(card_name,linked_account)
            card_id_list.append(card_id)
            
        custom_user['card_ids'] = ','.join(card_id_list)
        
        
        #9 Set notes
        number_of_notes = random.randint(1,len(user_values['notes']))
        custom_user_notes = random.sample(user_values['notes'],number_of_notes)
        custom_user['notes'] = ','.join(custom_user_notes)
        
        
        #10 Set note:associated_partner
        list_of_companies = list()
        note_associated_partner_list = list()
        for note in custom_user_notes :
            list_of_associated_partners = partner_role_dict[note]
            associated_partner = random.sample(list_of_associated_partners,1)[0]
            if associated_partner not in list_of_companies :
                list_of_companies.append(associated_partner)
                
            note_associated_partner = "{}:{}".format(note,associated_partner)
            note_associated_partner_list.append(note_associated_partner)
        custom_user["note:associated_partner"] = ','.join(note_associated_partner_list)
        
        #11 Set note:flow
        note_flow_list = list()
        for flow in note_flow_dict.keys() :
            for note in custom_user_notes :
                if note in note_flow_dict[flow] :
                    note_flow_list.append("{}:{}".format(note,flow))
        custom_user["note:flow"] = ','.join(note_flow_list)
        
        #12 Set note:amount
        note_associated_amount_list = list()
        for note in customer_user_notes :
            amount_chosen = random.sample(user_values["amount_values"])
            
            while amount_chosen > custom_user["limit"] :
                amount_chosen = random.sample(user_values["amount_values"])
            note_associated_amount = "{}:{}".format(note,amount)
            note_associated_amount_list.append(note_associated_amount)
            
        custom_user["note:amount"] = ','.join(note_associated_amount_list)
        
        #13 Set note:date
        note_associated_date_list = list()
        for note in custom_user_notes :
            fake_date = fake.date_between(start_date='-20y',end_date='+1y')
            date = '{}-{}-{}'.format(fake_date.day,fake_day.month,fake_date.year)
            note_associated_date = '{}:{}'.format(note,date)
            note_associated_list.append(note_associated_date)
        
        custom_user["note:date"] = ','.join(note_associated_date_list)
        
        #14 Set company names
        custom_user["company_names"] = ','.join(list_of_companies)
        
        if custom_user not in profile_set :
            profile_set.append(custom_user)
        
    for user_profile in profile_set :
        knowledge_base = knowledge_base.append(user_profile,ignore_index=True)
        
    excel_writer = pd.ExcelWriter(file_name)
    knowledge_base.to_excel(excel_writer,sheet_name='UserValues')
    excel_writer.save()
    return knowledge_base
        

In [8]:
knowledge_base = create_user_profiles(number_of_profiles=100,user_values=user_values,file_name='user_values.xlsx',list_of_columns=list_of_columns,knowledge_base=knowledge_base)

In [9]:
print(knowledge_base)

          name                                      partner_names  \
0     player_1  megan,harry,emma,james,lily,sarah,michael,thom...   
1     player_2                                        james,harry   
2     player_3                                         lily,megan   
3     player_4                             emma,sarah,olivia,lily   
4     player_5  olivia,michael,thomas,harry,emma,sarah,megan,j...   
5     player_6          megan,thomas,lily,james,michael,emma,jack   
6     player_7          lily,megan,harry,thomas,olivia,sarah,jack   
7     player_8               james,harry,megan,jack,olivia,thomas   
8     player_9                                         lily,james   
9    player_10                                     jack,emma,lily   
10   player_11                      lily,michael,harry,emma,sarah   
11   player_12                                              harry   
12   player_13  emma,thomas,olivia,harry,james,jack,michael,sarah   
13   player_14                    