In [1]:
import time
import numpy as np
import pandas as pd
import itertools

import conceptnet_lite
from conceptnet_lite import Label, edges_between, edges_for

In [2]:
### RAT
rat = 'RAT.csv'

### FRAT
frat = 'fRAT.csv'

In [65]:
# read the cues
df = pd.read_csv(frat, sep=';')

# connect to the database
conceptnet_lite.connect(r"C:\Users\rejna\Work_only_here\Miscellaneous\rakshitha\conceptnet_database")

df.head()

Unnamed: 0,w1,w2,w3,wans,answer,Match
0,question,reply,solution,answer,,
1,sensitive,sob,weep,cry,,
2,antlers,doe,fawn,deer,,
3,bud,dandelion,petals,flower,,
4,colt,mare,unicorn,horse,,


# FRAT
## Local search implementation
#### Search for related node intersection (no compound words)

In [123]:
###### Set these two parameters

check_for = '3' # separate digits by comma, even if only 1
t = True # whether or not the solution should be contained

######

In [124]:
def get_nodes(node):
    try:
        current = [(e.start.text, e.relation.name) 
                    for e in edges_for(Label.get(text=node, language='en').concepts, same_language=True) 
                       if e.start.text not in [c]]
        [current.append((e.end.text, e.relation.name)) 
            for e in edges_for(Label.get(text=node, language='en').concepts, same_language=True)
                   if e.end.text not in [c]]
        return set(current)
    except Exception as error:
        print('No label for the node "{}"... Are you sure the spelling is correct?'.format(node))
        return set()

def checker(results, check_for, t, cue):
    if '3' in check_for:
        yield results[0] & results[1] & results[2], [cue[0], cue[1], cue[2]]
    if '2' in check_for:
        for i in range(0, len(results) - 1):
            yield results[i] & results[i+1], [cue[i], cue[i+1]]
            
def get_output(result, cues, has_solution):
    solutions = [res[0] for res in result]
    relations = [res[1] for res in result]
    return {'FrAt': ', '.join(cues), 
            'ground solution': solution,
            'solutions': ', '.join(solutions),
            'has_solution': has_solution,
            'relation': ', '.join(relations)
           } 

In [122]:
# temp_df = df.copy()
concat = df.w1 + ' ' + df.w2 + ' ' + df.w3
concat = concat[:-1] # remove last nan element
cues = [list(map(lambda x: x.lower(), filter(len, line.split(' ')))) for line in concat]
start_time = time.time()
output = []
index = 0
total = 0
tp = 0

for cue in cues:
    results = []
    solution = df.iloc[index].wans
    index +=1 
    print('Finished {}. Timestamp: {} min'.format(cue, round((time.time()-start_time)/60, 2)))

    for c in cue:
        results.append(get_nodes(c))
    
    for result, cue in checker(results, check_for, t, cue):
        total += 1
        has_solution = any(solution.strip() in res[0] for res in result)
        if has_solution: tp+=1
        output.append(get_output(result, cue, has_solution))
    
# save
output.append({'Accuracy': str(round(100*tp/total, 2)) + '%'})
data = pd.DataFrame(output)
data.to_excel('outpu2.xlsx', index=False)

Finished ['question', 'reply', 'solution']. Timestamp: 0.0 min
Finished ['sensitive', 'sob', 'weep']. Timestamp: 0.05 min
Finished ['antlers', 'doe', 'fawn']. Timestamp: 0.07 min
Finished ['bud', 'dandelion', 'petals']. Timestamp: 0.1 min
Finished ['colt', 'mare', 'unicorn']. Timestamp: 0.16 min
Finished ['crown', 'royaly', 'throne']. Timestamp: 0.2 min
No label for the node "royaly"... Are you sure the spelling is correct?
Finished ['algebra', 'calculus', 'trigonometry']. Timestamp: 0.37 min
Finished ['pedal', 'pull', 'shove']. Timestamp: 0.41 min
Finished ['clockwise', 'left', 'wrong']. Timestamp: 0.44 min
Finished ['flu', 'nauseous', 'virus']. Timestamp: 0.47 min
Finished ['astronomy', 'moon', 'twinkle']. Timestamp: 0.5 min
Finished ['bait', 'pond', 'tuna']. Timestamp: 0.69 min
Finished ['bandaid', 'trim', 'wound']. Timestamp: 0.72 min
Finished ['gravity', 'low', 'up']. Timestamp: 0.76 min
Finished ['emergency', 'rapid', 'slow']. Timestamp: 0.83 min
Finished ['brawl', 'debate', 'sol

# RAT
## Search for related node intersection (compound words)

In [None]:
# example
word = 'cottage'
for e in edges_for(Label.get(text=word).concepts, same_language=True):
    if (e.start.text.find('_') != -1) & (e.start.text.find(word) != -1):
        print(e.start.text, "::", e.end.text, "|", e.relation.name)
    if (e.end.text.find('_') != -1) & (e.end.text.find(word) != -1):
        print(e.start.text, "::", e.end.text, "|", e.relation.name) 

In [None]:
def get_compound_words(word):
    """ Given a word, get all the compound words related to it as well as their relation name """
    result = []
    relation = []
    for e in edges_for(Label.get(text=word).concepts, same_language=True):
        if (e.start.text.find('_') != -1) & (e.start.text.find(word) != -1):
            result.append(e.start.text.replace(word, '').strip('_'))
            relation.append(e.relation.name)
        if (e.end.text.find('_') != -1) & (e.end.text.find(word) != -1):
            result.append(e.end.text.replace(word, '').strip('_'))
            relation.append(e.relation.name)

    joint_result = []
    for i in range(len(result)):
        if result[i].find('_') != -1:
            words = result[i].split('_')
            for word in words:
                if word != '': joint_result.append((word, relation[i]))
        else:
            joint_result.append((result[i], relation[i]))
    return joint_result
    # words can still be compounded, so we split them and merge the lists
#     return list(itertools.chain(*[filter(len, word.split('_')) for word in result])), relation

In [None]:
get_compound_words('cottage')

In [None]:
def get_solution(result, cues, solution):
    has_solution = solution in (tup[0].lower() for tup in result)
    solution_count = 0
    relation = ''
    if (has_solution):
        solution_count = 1
        relation = ' | '.join([tup[0] + ' - ' + tup[1] for tup in result])

    return {'RAT': ', '.join(cues), 'solution': solution, 'has_solution': has_solution, 'relation': relation}, solution_count, has_solution   


def checker(cues, i, check_for='', t=False):
    print('Getting compound words for {}, {} and {}...'.format(cues[0], cues[1], cues[2]))
    result = list()
    checks = [get_compound_words(cue) for cue in cues]
    solution = df.Solutions.iloc[i].strip(' ').lower()
    solution_count = 0

    result_3 = set(checks[0]) & set(checks[1]) & set(checks[2])
    result_0_1 = set(checks[0]) & set(checks[1])
    result_0_2 = set(checks[0]) & set(checks[2])
    result_1_2 = set(checks[1]) & set(checks[2])
    
    if check_for == '3':
        results = [result_3]
        cues = [[cues[0], cues[1], cues[2]]]
    elif check_for == '2':
        cues = [[cues[0], cues[1]], [cues[0], cues[2]], [cues[1], cues[2]]]
        results = [result_0_1, result_0_2, result_1_2]
    else:
        cues = [[cues[0], cues[1], cues[2]], [cues[0], cues[1]], [cues[0], cues[2]], [cues[1], cues[2]]]
        results = [result_3, result_0_1, result_0_2, result_1_2]
    
    for res, cue in zip(results, cues):
        output, count, has_solution = get_solution(res, cue, solution)
        if t: 
            if has_solution:
                result.append(output)
        else:
            result.append(output)
        solution_count += count
        
    return result, solution_count

In [None]:
# Example
result, count = checker(['cottage', 'swiss', 'cake'], 0, '3', True)
result

In [None]:
cues = [list(map(lambda x: x.lower(), filter(len, line.split(' ')))) for line in df.RAT.iloc[:]]

result = list()
solution_count = 0
i = 0
start_time = time.time()

###### Set these two parameters

check_for = '3' # check for tuples of 2 or 3?
t = True # whether or not the solution should be contained

######

for cue in cues:
    output, count = checker(cue, i, check_for, t)
    print('Finished. Timestamp: {} min'.format(round((time.time()-start_time)/60, 2)))
    solution_count += count
    
    for out in output:
        result.append(out)
    i += 1

# accuracy
result.append({'Accuracy': solution_count/(4*len(cues))})
          
# save
data = pd.DataFrame(result)
data.to_excel('output.xlsx', index=False)

In [None]:
# accuracy
result.append({'Accuracy': solution_count/(4*len(cues))})
          
# save
data = pd.DataFrame(result)
data.to_excel('output.xlsx', index=False)

### WordNet

In [None]:
from nltk.corpus import wordnet as wn

In [None]:
wn.synsets('dog')

In [None]:
solution_count