In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from pandas.api.types import CategoricalDtype

In [2]:
# hair_type = CategoricalDtype(categories=np.loadtxt("./hair_color.txt", dtype=str, delimiter=","), ordered=True)
# eyes_type = CategoricalDtype(categories=np.loadtxt("./eyes_color.txt", dtype=str, delimiter=","), ordered=True)

In [3]:
def df_from_file(file_name, hair_type=None, eyes_type=None):
    df = pd.read_csv(file_name, header=None, index_col=0, sep=',|\s+')
    df.columns = ["hair", "r1", "eyes", "r2"]
    df = df.drop(columns=["r1", "r2"])
    
    if hair_type and eyes_type:
        df["hair"] = df["hair"].astype(hair_type)
        df["eyes"] = df["eyes"].astype(eyes_type)
    else:
        df = df.astype("category")
        
    df["hair_id"] = df["hair"].cat.codes
    df["eyes_id"] = df["eyes"].cat.codes
        
    return df

In [4]:
# df = df_from_file("./tags.csv", hair_type=hair_type, eyes_type=eyes_type)
df = df_from_file("./tags.csv")

  


In [5]:
df.head()

Unnamed: 0_level_0,hair,eyes,hair_id,eyes_id
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,aqua,aqua,0,0
215,aqua,black,0,1
434,aqua,blue,0,2
614,aqua,brown,0,3
751,aqua,green,0,4


In [6]:
def write_label_to_file(path, hair_code, eyes_code):
    with path.open("w+") as f:
        print(*hair_code, sep=",", file=f)
        print(*eyes_code, sep=",", file=f)

In [7]:
def write_labels(path, dataframe):
    hair_colors = [color for color in dataframe["hair"].cat.categories]
    eyes_colors = [color for color in dataframe["eyes"].cat.categories]
    with Path(path / "hair_color.txt").open("w+") as f:
        print(*hair_colors, sep=",", file=f)
        
    with Path(path / "eyes_color.txt").open("w+") as f:
        print(*eyes_colors, sep=",", file=f)
        
    label_dir = path / "labels"
    label_dir.mkdir(parents=True, exist_ok=True)
    
    for index, row in dataframe.iterrows():
        hair_code = [0] * len(hair_colors)
        eyes_code = [0] * len(eyes_colors)
        hair_code[row["hair_id"]] = 1
        eyes_code[row["eyes_id"]] = 1
        write_label_to_file(label_dir / "{}.txt".format(index), hair_code, eyes_code)

In [8]:
write_labels(Path("./"), df)