# **Data Parser**

Parsing the data from a `.txt` file to a Dataframe stored as a `.csv`.

In [36]:
import pandas as pd


WORD_LENGHT = 5
ATTEMPTS_NUMBER = 6

EMPTY_WORD = " " * WORD_LENGHT
EMPTY_HITS = " " * ATTEMPTS_NUMBER

## Loading the Data

In [37]:
def load(filepath: str):
    with open(filepath) as f:
        return f.read().splitlines()


data = load("./data/input/normal.txt")
data[:5]

['salet BBBBB1 courd BBBBB2 nymph BBBBY3 whiff GGGGG4',
 '                                BGYYB3 pygmy GGGGG4',
 '                                BYBBB3 fizzy GGGGG4',
 '                                             YGBBG4 jiffy GGGGG5',
 '                                BYBGY3 hippy GGGGG4']

## Parsing the Data

In [38]:
def parse(data: list):

    # Init output table
    table = []

    # Iterate over each line of the txt file
    for row_in in data:
        
        # Count number of columns of each entry
        length = 64
        for columns_count in range(5, 1, -1):
            if len(row_in) >= length:
                break
            
            length -= 13
        
        # Split line into columns
        row_out = []
        for i in range(0, columns_count):

            # Extract columns values
            word = row_in[i * 13: i * 13 + 5]
            hits = row_in[i * 13 + 6: i * 13 + 12]

            # Fill empty columns with previous values
            if word == EMPTY_WORD:
                word = table[-1][i * 2]

            if hits == EMPTY_HITS:
                hits = table[-1][i * 2 + 1]

            # Append values to output row (And remove attempt number from 'hits' chars)
            row_out.append(word)
            row_out.append(hits[:min(len(hits), 5)])

        # Fill empty columns with previous values
        for i in range(5 - int(len(row_out) / 2)):
            row_out.append(EMPTY_WORD)
            row_out.append(EMPTY_HITS)

        # Append row to output table
        table.append(row_out)
    
    # Return output table
    return table


table = parse(data)
for row in table[:5]:
    print(row)

['salet', 'BBBBB', 'courd', 'BBBBB', 'nymph', 'BBBBY', 'whiff', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'courd', 'BBBBB', 'nymph', 'BGYYB', 'pygmy', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'courd', 'BBBBB', 'nymph', 'BYBBB', 'fizzy', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'courd', 'BBBBB', 'nymph', 'BYBBB', 'fizzy', 'YGBBG', 'jiffy', 'GGGGG']
['salet', 'BBBBB', 'courd', 'BBBBB', 'nymph', 'BYBGY', 'hippy', 'GGGGG', '     ', '      ']


## Converting to DataFrame

In [39]:
def convert_to_dataframe(table: list):
    df = pd.DataFrame(table)
    df = df.astype("category")

    df.columns = ["word_0", "hits_0", "word_1", "hits_1", "word_2", "hits_2", "word_3", "hits_3", "word_4", "hits_4"]
    return df

df = convert_to_dataframe(table)
df

Unnamed: 0,word_0,hits_0,word_1,hits_1,word_2,hits_2,word_3,hits_3,word_4,hits_4
0,salet,BBBBB,courd,BBBBB,nymph,BBBBY,whiff,GGGGG,,
1,salet,BBBBB,courd,BBBBB,nymph,BGYYB,pygmy,GGGGG,,
2,salet,BBBBB,courd,BBBBB,nymph,BYBBB,fizzy,GGGGG,,
3,salet,BBBBB,courd,BBBBB,nymph,BYBBB,fizzy,YGBBG,jiffy,GGGGG
4,salet,BBBBB,courd,BBBBB,nymph,BYBGY,hippy,GGGGG,,
...,...,...,...,...,...,...,...,...,...,...
2304,salet,YYYBG,blast,GGGGG,,,,,,
2305,salet,YYYYB,aisle,GGGGG,,,,,,
2306,salet,YYYYB,aisle,YBYYG,lease,GGGGG,,,,
2307,salet,YYYYB,aisle,YBYYY,leash,GGGGG,,,,


# Storing the final Data

In [40]:
df.to_csv("./data/output/normal.csv", index=False)

# Hard Mode

In [41]:
data = load("./data/input/hard.txt")
data[:5]

['salet BBBBB1 crump BBBBB2 doing BBGBB3 whiff GGGGG4',
 '                                BBGGB3 whiny GGGGG4',
 '                                BBGGG3 vying GGGGG4',
 '                                BBYBB3 fizzy GGGGG4',
 '                                             YGBBG4 jiffy GGGGG5']

In [42]:
table = parse(data)
for row in table[:5]:
    print(row)

['salet', 'BBBBB', 'crump', 'BBBBB', 'doing', 'BBGBB', 'whiff', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'crump', 'BBBBB', 'doing', 'BBGGB', 'whiny', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'crump', 'BBBBB', 'doing', 'BBGGG', 'vying', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'crump', 'BBBBB', 'doing', 'BBYBB', 'fizzy', 'GGGGG', '     ', '      ']
['salet', 'BBBBB', 'crump', 'BBBBB', 'doing', 'BBYBB', 'fizzy', 'YGBBG', 'jiffy', 'GGGGG']


In [43]:
df = convert_to_dataframe(table)
df

Unnamed: 0,word_0,hits_0,word_1,hits_1,word_2,hits_2,word_3,hits_3,word_4,hits_4
0,salet,BBBBB,crump,BBBBB,doing,BBGBB,whiff,GGGGG,,
1,salet,BBBBB,crump,BBBBB,doing,BBGGB,whiny,GGGGG,,
2,salet,BBBBB,crump,BBBBB,doing,BBGGG,vying,GGGGG,,
3,salet,BBBBB,crump,BBBBB,doing,BBYBB,fizzy,GGGGG,,
4,salet,BBBBB,crump,BBBBB,doing,BBYBB,fizzy,YGBBG,jiffy,GGGGG
...,...,...,...,...,...,...,...,...,...,...
2304,salet,YYYBG,blast,GGGGG,,,,,,
2305,salet,YYYYB,aisle,GGGGG,,,,,,
2306,salet,YYYYB,aisle,YBYYG,lease,GGGGG,,,,
2307,salet,YYYYB,aisle,YBYYY,leash,GGGGG,,,,


In [44]:
df.to_csv("./data/output/hard.csv", index=False)