In [11]:
import os
import pandas as pd
import numpy as np
import joblib
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from tqdm import tqdm

In [12]:
# read the data.csv file and get the image paths and labels
df = pd.read_csv('data/train_labels.csv')
path = 'data/train_set/train_set/'

data = pd.DataFrame()
data['image_path'] = path+df.img_name.values
labels = np.array(df.label.values)

# one hot encode
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
print(f"The first one hot encoded labels: {labels[0]}")
print(f"Mapping an one hot encoded label to its category: {lb.classes_[0]}")
print(f"Total instances: {len(labels)}")
for i in range(len(labels)):
    index = np.argmax(labels[i])
    data.loc[i, 'target'] = int(index)
# shuffle the dataset
data = data.sample(frac=1).reset_index(drop=True)
# save as csv file
data.to_csv('data/labelized.csv', index=False)
# pickle the label binarizer
joblib.dump(lb, 'data/lb.pkl')
print('Save the one-hot encoded binarized labels as a pickled file.')
print(data.head())

The first one hot encoded labels: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0]
Mapping an one hot encoded label to its category: 1
Total instances: 30612
Save the one-hot encoded binarized labels as a pickled file.
                                 image_path  target
0  data/train_set/train_set/train_10842.jpg    45.0
1  data/train_set/train_set/train_23938.jpg    55.0
2   data/train_set/train_set/train_2592.jpg    11.0
3  data/train_set/train_set/train_26945.jpg    51.0
4   data/train_set/train_set/train_1141.jpg    34.0


In [15]:
lb.classes_

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80])