## Student details
Student name: **Siddharth Prince**  
Student ID: **23052058**

# Task 1

In [1]:
import time
import re

#How many insertions, deletions, or substitutions does it take to turn x into y?
def edDistRecursive(x, y): 
 
    # If either x or y are empty, then one or more (depending on their length) INSERTIONS or DELECTIONS are needed to transform x to y.
    if len(x) == 0:
        #print(f'\t Converting "{x}" TO "{y}" requires {len(y)} INSERTIONS, therefor it costs {len(y)}') 
        return len(y)
    if len(y) == 0:
        #print(f'\t Converting "{x}" TO "{y}" requires {len(x)} DELETIONS, therefor it costs {len(x)}') 
        return len(x)
 
    # When neither x or y are empty, we must compute the cost of each operation and find the least costly one.
    
    digitMatchX = re.match("\d", x[-1])
    digitMatchY = re.match("\d", y[-1])
    # print(f'x[-1]: {x[-1]}, y[-1]: {y[-1]}')
    if x[-1] == y[-1]:
        delta = 0
    elif (digitMatchX and digitMatchY) or (digitMatchX==None and digitMatchY==None): # matching to check if either both are digits or both are letters
        delta = 1
    else:
        delta = 2
    # print(f'delta: {delta}')
        
    diagonal_or_substitution_cost = edDistRecursive(x[:-1], y[:-1]) + delta #what's the cost of SUBSTITUTING the last character of x with the last character of y
    vertical_or_deletion_cost     = edDistRecursive(x[:-1], y)      + 1     #what's the cost of DELETING the last character of x
    horizontal_or_insertion_cost  = edDistRecursive(x, y[:-1])      + 1     #what's the cost of INSERTING the last character of x into y
    
    # what's the least costly operation?
    minValue = min(diagonal_or_substitution_cost, vertical_or_deletion_cost, horizontal_or_insertion_cost)
    return minValue

### Running test cases

#### Test case #1
Two strings of the same length and all having letters only.

In [2]:
x = 'intention'
y = 'execution'
currentTime = time.time()
print(f'Edit distance between "{x}" and "{y}": {edDistRecursive(x, y)}')
print(f'--- Executed in {time.time() - currentTime} seconds ---')

Edit distance between "intention" and "execution": 5
--- Executed in 1.4362208843231201 seconds ---


#### Test case #2
Two strings of the same length and both having a combination of digits and letters.

In [3]:
x = 'intent1on'
y = 'ex3cut0n'
currentTime = time.time()
print(f'Edit distance between "{x}" and "{y}": {edDistRecursive(x, y)}')
print(f'--- Executed in {time.time() - currentTime} seconds ---')

Edit distance between "intent1on" and "ex3cut0n": 8
--- Executed in 0.779008150100708 seconds ---


#### Test case #3
Two strings of different lengths but both having letters only.

In [4]:
x = 'intentions'
y = 'execution'
currentTime = time.time()
print(f'Edit distance between "{x}" and "{y}": {edDistRecursive(x, y)}')
print(f'--- Executed in {time.time() - currentTime} seconds ---')

Edit distance between "intentions" and "execution": 6
--- Executed in 3.2421889305114746 seconds ---


#### Test case #4
Two strings of different lengths but having a mix of letters and digits.

In [5]:
x = 'int3ntionsla'
y = 'ex3cution'
currentTime = time.time()
print(f'Edit distance between "{x}" and "{y}": {edDistRecursive(x, y)}')
print(f'--- Executed in {time.time() - currentTime} seconds ---')

Edit distance between "int3ntionsla" and "ex3cution": 8
--- Executed in 14.044188976287842 seconds ---


# Task 2

Housekeeping code

In [6]:
# Installing dependency libraries

# !pip install names-dataset

# !pip install jellyfish

# Imports for the task

from names_dataset import NameDataset, NameWrapper
import jellyfish

nd = NameDataset() # Loading it once here. Loading it multiple times in separate cells crashed the kernel. :(

## Task 2.a

In [7]:
# Function to check if first and last names are present in the First and Last Names Dataset.
def inFLNDataSet(firstName, lastName):
    firstNameResult = nd.search(firstName)
    lastNameResult = nd.search(lastName)
    if firstNameResult['first_name']:
        print(f'First name, {firstName} found in dataset!')
    else:
        print(f'First name, {firstName} not available in dataset!')

    if lastNameResult['last_name']:
        print(f'Last name, {lastName} found in dataset!')
    else:
        print(f'Last name, {lastName} not available in dataset!')\
        
    return firstNameResult['first_name'], lastNameResult['last_name']

firstNameDetails, lastNameDetails = inFLNDataSet('Siddharth', 'Prince')

First name, Siddharth found in dataset!
Last name, Prince found in dataset!


## Task 2.b

In [8]:
def getSoundexCodes(firstName, lastName):
    if not firstName.isascii() or not lastName.isascii:
        return None
    firstNameSoundex, lastNameSoundex = jellyfish.soundex(firstName), jellyfish.soundex(lastName)
    print(f'Soundex code for first name, "{firstName}": {firstNameSoundex}')
    print(f'Soundex code for last name, "{lastName}": {lastNameSoundex}')
    return firstNameSoundex, lastNameSoundex

firstNameSoundex, lastNameSoundex = getSoundexCodes('Siddharth', 'Prince')

Soundex code for first name, "Siddharth": S363
Soundex code for last name, "Prince": P652


## Task 2.C

In [9]:
def getSameSoundexNames(name):
    if not name.isascii():
        return None
    nameSoundex = jellyfish.soundex(name)
    # print(nameSoundex)
    sameSoundexList = []
    for firstName in nd.first_names.keys():
        # print(firstName)
        if not firstName.isascii():
            continue
        tempNameSoundex = jellyfish.soundex(firstName)
        if tempNameSoundex == nameSoundex:
            sameSoundexList.append(firstName)
    return sameSoundexList

print(f'List of names that have the same soundex codes as my first name: \n{getSameSoundexNames("Siddharth")}')

List of names that have the same soundex codes as my first name: 
['S Dorota', 'Sadart', 'Sadradin', 'Sadrata', 'Sadreddin', 'Sadredin', 'Sadrettin', 'Sadriddin', 'Sadrit', 'Sadroddin', 'Sadruddin', 'Sadrudin', 'Satriadi', 'Satrodhan', 'Satrudhan', 'Saturday', 'Saturdino', 'Schoettert', 'Sedrati', 'Sedreddin', 'Sedrettin', 'Seduardoz', 'Setordzi', 'Setrida', 'Shadurdy', 'Shatrudhan', 'Shidharth', 'Shidratul', 'Shihdurdy', 'Shkodra Tennis', 'Sidaarth', 'Sidarta', 'Sidarth', 'Siddareddy', 'Siddarth', 'Siddartha', 'Siddhart', 'Siddharth', 'Siddhartha', 'Siddharthan', 'Sidhart', 'Sidharta', 'Sidharth', 'Sidhartha', 'Sidharthan', 'Sidhurath', 'Sidrat', 'Sidratel', 'Sidratul', 'Sidratul Muntaha', 'Sidrit', 'Sidrita', 'Sidriti', 'Sidrotul', 'Sidurtlede', 'Sita Rudra', 'Siti Hardiyanti', 'Skaidrite', 'Skaidryte', 'Sotiriadis', 'Sstuardo', 'Starden', 'Stardent', 'Starodub', 'Start', 'Startbrac', 'Starttipaja', 'Steewardsen', 'Stert', 'Steward', 'Stewart', 'Stewarth', 'Steyaert', 'Sthewart', 'St

## Task 2.D

In [10]:
print(f'Levenshtein distance between my first and last names: {jellyfish.levenshtein_distance("Siddharth", "Prince")}')

Levenshtein distance between my first and last names: 9
