# Graphical Error Modelling

This notebook details about algorithms discussed in section 3 of the paper, "Alignment Analysis of Sequential Segmentation of Lexicons to Improve Automatic Cognate Detection"

## Imports

In [1]:
from math import ceil, floor

def common_elements(list1, list2):
    return [element for element in list1 if element in list2]

def uncommon_elements(list1, list2):
    return [element for element in list1 if element not in list2]

In [5]:
def shingle(input, k):
    k = min(len(input), k)
    start_combinations = [input[:i] for i in range(1, k)]
    kgrams = [input[i:i + k] for i in range(len(input) - k + 1)]
    end_combinations = [input[-i:] for i in range(k - 1, 0, -1)]
    return start_combinations + kgrams + end_combinations

def two_ends(input, k):
    basic = shingle(input, k)
    result =[]
    for i in range(1, len(basic) + 1):
        if i <= (len(input) - i + 2):
            result.append(str(i) + basic[i - 1]) # Append numbers from start
        else:
            result.append(basic[i - 1] + str(len(basic) - i + 1)) # Append numbers from end
    return result

## Graphical Modelling Algorithm

In [3]:
def graph_model(first, second):
    if len(first) == 0:
        first.append("nun") #insert empty token if found empty
    if len(second) == 0:
        second.append("nun") #insert empty token if found empty
    
    #While loops to equalize the sizes
    while(len(first) < len(second)):
        pos = ceil(len(first) / 2)
        first.insert(pos, "nun")
    
    #While loops to equalize the sizes
    while(len(first) > len(second)):
        pos = floor(len(second) / 2)
        second.insert(pos, "nun")
    
    #Pairs in tuples
    graph = set() #Graph in sets to avoid duplicates
    
    for i in range(len(first)):
        pair = (first[i], second[i]) #One to one mapping with same index
        graph.add(pair)
    for i in range(len(first) - 1):
        pair = (first[i], second[i + 1]) #One to one mapping with an index ahead
        graph.add(pair)
    if len(first) > 1:
        for i in range(1, len(first)):
            pair = (first[i], second[i - 1]) #One to one mapping with an index before
            graph.add(pair)
    return graph

## Playing with the functions 

In [7]:
q = two_ends("pizzzza", 2) #Your query
d = two_ends("pizza", 2) #Your document
print(q,d)
qd = common_elements(q, d) # q cap d
first = uncommon_elements(q, qd) # q - (q cap d)
second = uncommon_elements(d, qd) # d - (q cap d)
print("First and second are", first, second)
print("Graph is ", graph_model(first,second))

['1p', '2pi', '3iz', '4zz', 'zz4', 'zz3', 'za2', 'a1'] ['1p', '2pi', '3iz', 'zz3', 'za2', 'a1']
First and second are ['4zz', 'zz4'] []
Graph is  {('4zz', 'nun'), ('zz4', 'nun')}
