In [1]:
#Denis Paperno, 2018-2020
#Code for generating interpreted languages with 'personal relations' interpretation

from collections import defaultdict
import random

'''List of reserved characters

at least at the level of logical form the basic elements of the language are encoded 
as characters. Some characters are reserved for relation names or grammatical words'''
reserved_chars=['p','f','e','c','t','o','s']

def newUniverse(n):
	"""generate a list of n names - characters outside of the reserved list"""
	z=[]
	i=ord('a')
	#add entity names to the list starting from 'a'
	while len(z)<n:
		ch=chr(i)
		if ch not in reserved_chars:
			z.append(ch)
		i+=1
	return z

class InterpretedLanguage:
    """InterpretedLanguage class of sets of interpreted strings

    Initialized with rel_num relations (up to 4) and 2*num_pairs entities.
    For convenience of forming symmetric relations, there is always an even number
    of entities."""
    def __init__(self, rel_num,num_pairs):
        if rel_num+3>len(reserved_chars): raise(ValueError)
        self.rel=reserved_chars[:rel_num]
        names=newUniverse(num_pairs*2)
        #friend relation is initialized randomly as a symmetric relation
        random.shuffle(names)
        self.friend={}
        for i in range(num_pairs):
            self.friend[names[2*i]]=names[2*i+1]
            self.friend[names[2*i+1]]=names[2*i]
        random.shuffle(names)
        #enemy relation is initialized randomly as a symmetric relation
        self.enemy={}
        for i in range(num_pairs):
            self.enemy[names[2*i]]=names[2*i+1]
            self.enemy[names[2*i+1]]=names[2*i]
        #parent relation is initialized randomly as a cycle involving all entities
        random.shuffle(names)
        self.parent={}
        for i in range(len(names)):
             self.parent[names[i]]=names[i-1]
        #child relation is initialized as the inverse of the parent relation
        self.child={}
        for i in range(len(names)):
             self.child[names[i-1]]=names[i]
        self.names=names

    def examples(self,i):
        '''returns all logical forms of complexity (length) i 
        
        A logical form is a string of relation chars followed by an entity name char'''
        if i<=1: ex=self.names
        else: ex=[x+y for x in self.rel for y in self.examples(i-1)]
        return ex

    def interpret(self,s):
        """interpretation function returns the entity a logical form s describes"""
        assert type(s) is str
        assert len(s)>0
        if len(s)==1 and s in self.names: return s
        elif len(s)>1:
            if s[0]=='f': return self.friend[self.interpret(s[1:])]
            elif s[0]=='e': return self.enemy[self.interpret(s[1:])]
            elif s[0]=='p': return self.parent[self.interpret(s[1:])]
            elif s[0]=='c': return self.child[self.interpret(s[1:])]
            else: print(s); raise KeyError

    def express(self,s,b):
        """spellout function translating logical form s into a string of word identifiers
        
        s: logical form
        b: branching parameter"""
        if len(s)==1 and s in self.names: return s
        elif len(s)>1:
            r=s[0]
            if r in self.rel:
                o=random.choice(b)
                if o=='r': return 't'+r+'o'+self.express(s[1:],b) 
                elif o=='l': return self.express(s[1:],b)+'s'+r
                else: raise(KeyError)
            else: raise(KeyError)
        else: raise(KeyError)

    def lines(self,data,b):
        """Interprets and expresses logical forms from a list
        
        data: list of logical forms
        b: branching parameter
        Returns a mapping from entities to lists of strings expressing them, based on a list
        of logical forms data"""
        l=defaultdict(list)
        for d in data:
            l[self.interpret(d)].append(self.express(d,b))
        return l

    def memorization_data(self):
        """returns all logical forms of complexity 1 and 2"""
        return self.examples(1)+self.examples(2)

    def allexamples(self, b,complexity=2,min_complexity=1):
        """returns a list of (string,referent) pairs for all logical forms 
        
        Parameters define breanching and complexity range:
        b: branching parameter
        min_complexity: minimal logical form complexity (defaults to a single name)
        complexity: max number of relation elements in a logical form"""
        z=[]
        sample = []
        for c in range(min_complexity,complexity+1): sample+=self.examples(c)
        for e in sample:
            line=self.express(e,b)
            category=self.interpret(e)
            z.append((line,category))
        return z

    def randomexamples(self, k, b,complexity=3,min_complexity=1):
        "returns k random examples of complexity up to a given value"
        z=[]
        sample = []
        for c in range(complexity): sample+=self.examples(c)
        for i in range(min_complexity,k):
            e=random.choice(sample)
            line=self.express(e,b)
            category=self.interpret(e)
            z.append((line,category))
        return z


In [2]:
newUniverse(5)

['a', 'b', 'd', 'g', 'h']

In [3]:
test = InterpretedLanguage(rel_num=4, num_pairs=6)

In [12]:
example = test.examples(4)
example[4]
# example

'pppk'

In [5]:
test.enemy

{'m': 'n',
 'n': 'm',
 'q': 'b',
 'b': 'q',
 'h': 'd',
 'd': 'h',
 'i': 'l',
 'l': 'i',
 'j': 'a',
 'a': 'j',
 'g': 'k',
 'k': 'g'}

In [6]:
test.memorization_data()

['i',
 'g',
 'n',
 'l',
 'k',
 'h',
 'j',
 'm',
 'b',
 'q',
 'd',
 'a',
 'pi',
 'pg',
 'pn',
 'pl',
 'pk',
 'ph',
 'pj',
 'pm',
 'pb',
 'pq',
 'pd',
 'pa',
 'fi',
 'fg',
 'fn',
 'fl',
 'fk',
 'fh',
 'fj',
 'fm',
 'fb',
 'fq',
 'fd',
 'fa',
 'ei',
 'eg',
 'en',
 'el',
 'ek',
 'eh',
 'ej',
 'em',
 'eb',
 'eq',
 'ed',
 'ea',
 'ci',
 'cg',
 'cn',
 'cl',
 'ck',
 'ch',
 'cj',
 'cm',
 'cb',
 'cq',
 'cd',
 'ca']

In [7]:
example[-1]

'ccca'

In [8]:
test.express(s = example[0],b='r')

'tpotpotpoi'

In [13]:
test.lines(example[:4],b='r')

defaultdict(list,
            {'q': ['tpotpotpoi'],
             'd': ['tpotpotpog'],
             'a': ['tpotpotpon'],
             'i': ['tpotpotpol']})

In [15]:
test.allexamples(b='l')

[('i', 'i'),
 ('g', 'g'),
 ('n', 'n'),
 ('l', 'l'),
 ('k', 'k'),
 ('h', 'h'),
 ('j', 'j'),
 ('m', 'm'),
 ('b', 'b'),
 ('q', 'q'),
 ('d', 'd'),
 ('a', 'a'),
 ('isp', 'a'),
 ('gsp', 'i'),
 ('nsp', 'g'),
 ('lsp', 'n'),
 ('ksp', 'l'),
 ('hsp', 'k'),
 ('jsp', 'h'),
 ('msp', 'j'),
 ('bsp', 'm'),
 ('qsp', 'b'),
 ('dsp', 'q'),
 ('asp', 'd'),
 ('isf', 'j'),
 ('gsf', 'q'),
 ('nsf', 'k'),
 ('lsf', 'a'),
 ('ksf', 'n'),
 ('hsf', 'b'),
 ('jsf', 'i'),
 ('msf', 'd'),
 ('bsf', 'h'),
 ('qsf', 'g'),
 ('dsf', 'm'),
 ('asf', 'l'),
 ('ise', 'l'),
 ('gse', 'k'),
 ('nse', 'm'),
 ('lse', 'i'),
 ('kse', 'g'),
 ('hse', 'd'),
 ('jse', 'a'),
 ('mse', 'n'),
 ('bse', 'q'),
 ('qse', 'b'),
 ('dse', 'h'),
 ('ase', 'j'),
 ('isc', 'g'),
 ('gsc', 'n'),
 ('nsc', 'l'),
 ('lsc', 'k'),
 ('ksc', 'h'),
 ('hsc', 'j'),
 ('jsc', 'm'),
 ('msc', 'b'),
 ('bsc', 'q'),
 ('qsc', 'd'),
 ('dsc', 'a'),
 ('asc', 'i')]

In [11]:
test.randomexamples(k=4, b='l',complexity=3,min_complexity=1)

[('ksf', 'n'), ('lsc', 'k'), ('msc', 'b')]