In [1]:
from csv import DictReader

with open('data/subset_train.csv', 'r') as csv_file:
    reader = DictReader(csv_file)
    rows_train = [r for r in reader]
csv_file.close()

with open('data/subset_valid.csv', 'r') as csv_file:
    reader = DictReader(csv_file)
    rows_valid = [r for r in reader]
csv_file.close()

with open('data/subset_test.csv', 'r') as csv_file:
    reader = DictReader(csv_file)
    rows_test = [r for r in reader]
csv_file.close()

In [2]:
all_rows = []
all_rows.extend(rows_train)
all_rows.extend(rows_valid)
all_rows.extend(rows_test)

print(len(rows_train), len(rows_valid), len(rows_test), len(all_rows))

6999 2000 1001 10000


In [3]:
import numpy as np
from torch.nn.functional import one_hot
from torch import as_tensor

_preformat_target = []
for row in all_rows:
    _preformat_target.append(
        row['Machine failure'] +
        row['TWF'] +
        row['HDF'] +
        row['PWF'] +
        row['OSF'] +
        row['RNF']
    )
_, _class_vals = np.unique(_preformat_target, return_inverse=True)
formatted_target = one_hot(as_tensor(_class_vals)).tolist()

In [4]:
n_classes = len(formatted_target[0])
headers = [
    '\ufeffUDI',
    'Product ID',
    'Type',
    'Air temperature [K]',
    'Process temperature [K]',
    'Rotational speed [rpm]',
    'Torque [Nm]',
    'Tool wear [min]'
]
for n in range(n_classes):
    headers.append('Class_{}'.format(n))

for idx in range(len(all_rows)):
    for n in range(n_classes):
        all_rows[idx]['Class_{}'.format(n)] = formatted_target[idx][n]

for row in all_rows:
    del row['Machine failure']
    del row['TWF']
    del row['HDF']
    del row['PWF']
    del row['OSF']
    del row['RNF']

formatted_train = all_rows[0: len(rows_train)]
formatted_valid = all_rows[len(rows_train): len(rows_train) + len(rows_valid)]
formatted_test = all_rows[len(rows_train) + len(rows_valid):]

print(len(formatted_train), len(formatted_valid), len(formatted_test))

6999 2000 1001


In [5]:
from csv import DictWriter

with open('data/formatted_train.csv', 'w', encoding='utf8') as csv_file:
    writer = DictWriter(csv_file, headers, delimiter=',', lineterminator='\n')
    writer.writeheader()
    writer.writerows(formatted_train)
csv_file.close()

with open('data/formatted_valid.csv', 'w', encoding='utf8') as csv_file:
    writer = DictWriter(csv_file, headers, delimiter=',', lineterminator='\n')
    writer.writeheader()
    writer.writerows(formatted_valid)
csv_file.close()

with open('data/formatted_test.csv', 'w', encoding='utf8') as csv_file:
    writer = DictWriter(csv_file, headers, delimiter=',', lineterminator='\n')
    writer.writeheader()
    writer.writerows(formatted_test)
csv_file.close()