# Path name conversion

When a file is selected, the path name is presented to the `AudioFile` library as an `std::string`. It seems like the path is UTF-8 encoded which the `ifstream` class cannot interpret.

The file name must therefore be converted to either ANSI CP-1252 (or possibly UTF-16) for the path to be valid on Windows.

The idea is to make a look-up table of all CP-1252 and their respective UTF-8 equivalent.

In [64]:
lut = []
for x in range(1, 256): 
    try:
        y = [X for X in bytearray([x]).decode("cp1252").encode("utf-8")]
        while len(y) < 3:
            y.append(0)
    except: 
        continue
        
    lut.append([x] + y)

In [65]:
def printhex(x):
    print(" ".join(["{:02x}".format(X) for X in x]))
    
def conv(x0, x1, x2):
    if x0 < 128:
        return x0, 0
    
    printhex([x0,x1,x2])
    
    for e in lut:
        if (e[1] == x0) and (e[2] == 0) and (e[3] == 0):
            return e[0], 0
        elif (e[1] == x0) and (e[2] == x1) and (e[3] == 0):
            return e[0], 1
        elif (e[1] == x0) and (e[2] == x1) and (e[3] == x2):
            return e[0], 2
    
    return 0x00, 0

In [66]:
s1 = [X for X in "tøst_string.txt".encode("utf-8")]
s2 = []
N = len(s1)
skip = 0
for n in range(N):
    if skip > 0:
        skip -= 1
        continue
        
    if n < N-2:
        c, skip = conv(s1[n], s1[n+1], s1[n+2])
    elif n < N-1:
        c, skip = conv(s1[n], s1[n+1], 0)
    else:
        c, skip = conv(s1[n], 0, 0)
        
    s2.append(c)

printhex(s1)
printhex(s2)
printhex(bytearray(s2).decode("cp1252").encode("utf-8"))

c3 b8 73
74 c3 b8 73 74 5f 73 74 72 69 6e 67 2e 74 78 74
74 f8 73 74 5f 73 74 72 69 6e 67 2e 74 78 74
74 c3 b8 73 74 5f 73 74 72 69 6e 67 2e 74 78 74


## Generate C source code

In [77]:
print("const uint8_t lut[][4] = {")
print(",\n".join(["    {{ 0x{:02x}, 0x{:02x}, 0x{:02x}, 0x{:02x} }}".format(*e) for e in lut]))
print("};")
print("")
print("#define LUT_LENGTH {:d}".format(len(lut)))


const uint8_t lut[][4] = {
    { 0x01, 0x01, 0x00, 0x00 },
    { 0x02, 0x02, 0x00, 0x00 },
    { 0x03, 0x03, 0x00, 0x00 },
    { 0x04, 0x04, 0x00, 0x00 },
    { 0x05, 0x05, 0x00, 0x00 },
    { 0x06, 0x06, 0x00, 0x00 },
    { 0x07, 0x07, 0x00, 0x00 },
    { 0x08, 0x08, 0x00, 0x00 },
    { 0x09, 0x09, 0x00, 0x00 },
    { 0x0a, 0x0a, 0x00, 0x00 },
    { 0x0b, 0x0b, 0x00, 0x00 },
    { 0x0c, 0x0c, 0x00, 0x00 },
    { 0x0d, 0x0d, 0x00, 0x00 },
    { 0x0e, 0x0e, 0x00, 0x00 },
    { 0x0f, 0x0f, 0x00, 0x00 },
    { 0x10, 0x10, 0x00, 0x00 },
    { 0x11, 0x11, 0x00, 0x00 },
    { 0x12, 0x12, 0x00, 0x00 },
    { 0x13, 0x13, 0x00, 0x00 },
    { 0x14, 0x14, 0x00, 0x00 },
    { 0x15, 0x15, 0x00, 0x00 },
    { 0x16, 0x16, 0x00, 0x00 },
    { 0x17, 0x17, 0x00, 0x00 },
    { 0x18, 0x18, 0x00, 0x00 },
    { 0x19, 0x19, 0x00, 0x00 },
    { 0x1a, 0x1a, 0x00, 0x00 },
    { 0x1b, 0x1b, 0x00, 0x00 },
    { 0x1c, 0x1c, 0x00, 0x00 },
    { 0x1d, 0x1d, 0x00, 0x00 },
    { 0x1e, 0x1e, 0x00, 0x00 },
    { 0x1f, 0