In [1]:
# import libraries
import os
import numpy as np
import pandas as pd
import random
import pickle5 as pickle
import time

In [2]:
# import raw data 
data = np.load('../../data/archive/Wafer_Map_Datasets.npz')
waferMap = data['arr_0']
labels = data['arr_1']

# create dataframe
df = pd.DataFrame()
df['waferMap'] = [x for x in waferMap]
df['labels'] = [x for x in labels]
df['label_total'] = df.labels.apply(lambda x: sum(x))

print(df.shape)
df.head()

(38015, 3)


Unnamed: 0,waferMap,labels,label_total
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 1, 0, 0, 0, 1, 0]",3
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 1, 0, 0, 0, 1, 0]",3
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 1, 0, 0, 0, 1, 0]",3
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 1, 0, 0, 0, 1, 0]",3
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 1, 0, 0, 0, 1, 0]",3


In [3]:
# keep only single labeled wafers
df_single = df[df.label_total <= 1].reset_index(drop=True)

print(df_single.shape)
df_single.head()

(8015, 3)


Unnamed: 0,waferMap,labels,label_total
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1


In [4]:
# label failure types
def failureType(x):
    if x[0] == 1:
        return 'Center'
    elif x[1] == 1:
        return 'Donut'
    elif x[2] == 1:
        return 'Edge-Loc'
    elif x[3] == 1:
        return 'Edge-Ring'
    elif x[4] == 1:
        return 'Loc'
    elif x[5] == 1:
        return 'Near-full'
    elif x[6] == 1:
        return 'Scratch'
    elif x[7] == 1:
        return 'Random'
    else:
        return 'none'
        
df_single['failureType'] = df_single.labels.apply(lambda row: failureType(row))

# add index column to identify specific wafers 
df_single.reset_index(inplace=True)
df_single = df_single.rename(columns={'index':'ID'})

# add detection model labels
df_single['detectLabels'] = df_single['failureType'].apply(lambda x: 0 if x == 'none' else 1)

# add classification model labels
fail_dict = {'none': 8, 'Loc': 0, 'Edge-Loc': 1, 'Center': 2, 'Edge-Ring': 3, 
             'Scratch': 4, 'Random': 5, 'Near-full': 6, 'Donut': 7}
df_single['classifyLabels'] = df_single['failureType'].apply(lambda x: fail_dict[x])

print(df_single.shape)
df_single.head()

(8015, 7)


Unnamed: 0,ID,waferMap,labels,label_total,failureType,detectLabels,classifyLabels
0,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1,Center,1,2
1,1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1,Center,1,2
2,2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1,Center,1,2
3,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1,Center,1,2
4,4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[1, 0, 0, 0, 0, 0, 0, 0]",1,Center,1,2


In [5]:
np.unique(df_single.failureType)

array(['Center', 'Donut', 'Edge-Loc', 'Edge-Ring', 'Loc', 'Near-full',
       'Random', 'Scratch', 'none'], dtype=object)

In [6]:
# # save dataset for future use
# with open('../../data/MixedWM38-single.pkl', "wb") as f:
#     pickle.dump(df_single, f)