# Code

Our encoding is ASCII, and to determine whether characters are in the ASCII a-z or A-Z we used the ord() function in python and that converts a letter to the ASCII number for a, z, A, and Z. That way we can check if the letter is acceptable by converting it using ord() and make sure the number is equal to or between a to z or A to Z. We also assumed the use of a Linux system and the current working directory to be /users/bob

Our way of representing a file path was to convert the file path string to an array using the split() function and splitting at the “/” symbol. /home/bob would become [””, “home”, “bob”]. We can use this to more easily determine if paths are equivalent because we can now compare arrays instead of strings. So the .. will tell us to pop off the end of the path because it means parent directory, and the Tilda represents home so we hard-coded that in there. We also reject symbols outside of [".", "_", "-", "~", “/”].

To determine if they are homographs we check for everything that makes them different or invalid such as illegal characters, any more than 1 “/” consecutively which does not work in paths, a “\~” in the middle of the path if the path ends with an actual file or not, and the length of the paths after we strip out all the “..” and “~” to determine for sure they are the same paths.

Our test cases check for using dot notations (.. and .), check for illegal characters, check for homographs like we put a Russian e in one case. We also check things with lots of “/” or lots of “./././”. We check for things with no actual file names at all.

This program assumes that:

* the default navigation separator is `/`
* system encoding is `UTF-8`
* the File System is case sensetive (`folder/` and `Folder/` are the different)
* only English characters, numbers, and `.`, `_`, `-` are allowed in file and folder names
* there is no unreachable destination with `../`



In [None]:
## logic defenitions

# working directory == /users/bob
home = ["users", "bob"]

# encoding in unicode
startCapital = ord("A")
startLower = ord("a")
alphabetLength = 26
endCapital = startCapital + alphabetLength
endLower = startLower + alphabetLength

# initial configuration
systemSeparator = "/"
allowedCharacters = [".", "_", "-", "~", systemSeparator]

# checking if letter is Capitial by ascii value
def isCapital(char):
    charOrd = ord(char)

    return charOrd >= startCapital and charOrd < endCapital

# checking if letter is lowercase by ascii value
def isLower(char):
    charOrd = ord(char)

    return charOrd >= startLower and charOrd < endLower

# checking if the current character is allowed (english or digit)
def isLegal(chars):
    for char in chars:
        if not isCapital(char) and not isLower(char) \
        and not char.isdigit() and not char in allowedCharacters:
            return False
    return True

# getting the char charaters
def getChars(word):
    return [char for char in word if char not in allowedCharacters]

# checks if the last item is a file (currently, if it exists)
def isEndingWithFile(segments):
    lastElement = segments[-1]

    return len(lastElement.split(".")) > 1

# optimizes the list of path items
def getUniformed(segments):
    uniformed = []

    if segments[0] == "~":
      uniformed.extend(home)
      segments.pop(0)
    elif segments[0] != "":
      uniformed.extend(home)

    for segment in segments:
        if segment == "." or not segment:
            continue

        if segment == ".." and len(uniformed):
            uniformed.pop()
            continue

        uniformed.append(segment)

    return uniformed

# checks if two path strings are homographic
def areHomographs(first, second):
    if not isLegal(getChars(first)):
        print("Illegal characters in the first path")
        return False

    if not isLegal(getChars(second)):
        print("Illegal characters in the second path")
        return False

    if first.find("//") != -1:
        print("Illegal // in the first path")
        return False

    if second.find("//") != -1:
        print("Illegal // in the second path")
        return False

    firstTildaSplit = first.split("~")
    secondTildaSplit = second.split("~")

    if len(firstTildaSplit) > 2 or len(firstTildaSplit) == 2 and firstTildaSplit[0] != "":
        print("Tilda is used wrong in the first path")
        return False

    if len(secondTildaSplit) > 2 or len(secondTildaSplit) == 2 and secondTildaSplit[0] != "":
        print("Tilda is used wrong in the second path")
        return False

    firstSegments = first.split(systemSeparator)
    secondSegments = second.split(systemSeparator)

    if not (isEndingWithFile(firstSegments)):
        print("The first path doesn't end with file")
        return False

    if not (isEndingWithFile(secondSegments)):
        print("The second path doesn't end with file")
        return False

    firstUniformed = getUniformed(firstSegments)
    secondUniformed = getUniformed(secondSegments)

    if len(firstUniformed) != len(secondUniformed):
        print("Different paths")
        return False

    for i in range(len(firstUniformed)):
        if (firstUniformed[i] != secondUniformed[i]):
            print("Different paths")
            return False

    return True

def testHomographs(first, second):
  print("Specify the first filename:  " + first)
  print("Specify the second filename: " + second)

  if (areHomographs(first, second)):
    print("The paths are homographs\n")
  else:
    print("The paths are NOT homographs\n")

# Test scenarios

* ***e***: What is the encoding in this scenario?
* ***r***: This can be a bit difficult to figure out, so I will help. The rendition is a file handle. In the case of C++, it would be ifstream fin;. In Python, it would be the file object returned by the open() function. This points to a given resource on the file system.
* ***R()***: This is the rendering function. In the case of our scenario, this might be fin.open(e); in C++ or open(filename, 'r') in Python. In other words, it turns an encoding into a rendition. Note that we do not need to and should not use fin.open() or open() for this problem because it treats all paths the same that do not refer to resources on the system. For example, if a file path is non-existent, it would be treated the same as another non-existent file path. In other words, it would report two non-homographs as the same even when they are obviously different.
* ***c***: This is the canon. You need to identify an appropriate canon. Think of a canonical way to represent a file path.
* ***C()***: The bulk of your work will be to create an appropriate canonicalization function. This should be informed by the functionality of *R()* but should not use it.
* ***H()***: This is the homograph function. It determines if two encodings are the same. It should leverage the work done in *C()*. Check the textbook for an idea of what this will look like.



Automated Tests
Test cases are provided in a 2D array.
Each test case is run and outputs whether or not it succeeded.

In [None]:
# Each array within testCases is an individual test case, where
# [first filename, second filename, Expected Output (True/False)]

testCases = [
    [1, "file.txt", "file.txt", True],
    [2, "file.txt", "./file.txt", True],
    [3, "file.txt", "../../file.txt", False],
    [4, "file.txt", "../../users/bob/file.txt", True],
    [5, "file.txt", "File.txt", False],
    [6, "file.txt", "/file.txt", False],
    [7, "file.txt", "file%.txt", False], # % should not be legal character
    [8, "filе.txt", "file.txt", False], # One of the "e"s is a homograph of ascii "e"
    [9, "~/file.txt", "file.txt", True],
    [10, "/../../../file.txt", "../../../file.txt", True],
    [11, "~/../../../file.txt", "../../../file.txt", True],
    [12, "~/../../~/file.txt", "../../../file.txt", False],
    [13, "/users/bob/file.txt", "file.txt", True],
    [14, "/users/./bob/file.txt", "././././file.txt", True],
    [15, "/////////file.txt", "././././././file.txt", False],
    [16, "../folder/../folder2/../file.txt", "../../users/file.txt", True],
    [17, "/", "/./", False],
    [18, "", "", False],
    [19, "file_1.txt", "~/file_1.txt", True],
    [20, "file.txt", "file.txt", True],
]
    # [20, "file.txt", "file.txt", True],

def runAutomatedTests(testCases):
  for case in testCases:
    output = areHomographs(case[1], case[2])
    expectedOutput = case[3]

    if(output == expectedOutput):
      print(f"{case[0]}. Success\n")
    else:
      print(f"{case[0]}. Failure in test - file1 = '{case[1]}' / file2 = '{case[2]}'")
      print(f"\t\t\tOutput: {output} / Expected: {expectedOutput}\n")

runAutomatedTests(testCases)

1. Success

2. Success

Different paths
3. Success

4. Success

Different paths
5. Success

Different paths
6. Success

Illegal characters in the second path
7. Success

Illegal characters in the first path
8. Success

9. Success

10. Success

11. Success

Tilda is used wrong in the first path
12. Success

13. Success

14. Success

Illegal // in the first path
15. Success

16. Success

The first path doesn't end with file
17. Success

The first path doesn't end with file
18. Success

19. Success

20. Success



In [None]:
def testHomographs(first, second):
  if (areHomographs(first, second)):
    print("The paths are homographs\n")
  else:
    print("The paths are NOT homographs\n")

print("The current directory is /users/bob")
print("Specify the first filename:  ", end = "")
fname1 = input()
print("Specify the second filename: ", end = "")
fname2 = input()
testHomographs(fname1, fname2)

The current directory is /users/bob
Specify the first filename:  /dir1/dir2/../a.txt
Specify the second filename: ../../dir1/a.txt
The paths are homographs

