In [None]:
# import libraries
import os
import time
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle5 as pickle
from functools import reduce

import helpers as helper

#### Metadata from raw data file

In [None]:
# load data
with open('../../data/WM-clean.pkl', "rb") as fh:
    raw_data = pickle.load(fh)

# add index column to identify specific wafers 
raw_data.reset_index(inplace=True)
raw_data = raw_data.rename(columns={'index':'ID', 'shape': 'dims'})

# add detection model labels
raw_data['detectLabels'] = raw_data['failureType'].apply(lambda x: 0 if x == 'none' else 1)

# add classification model labels
fail_dict = {'none': 8, 'Loc': 0, 'Edge-Loc': 1, 'Center': 2, 'Edge-Ring': 3, 
             'Scratch': 4, 'Random': 5, 'Near-full': 6, 'Donut': 7}
raw_data['classifyLabels'] = raw_data['failureType'].apply(lambda x: fail_dict[x])

# keep only test set
test = raw_data[raw_data.dataset == 'test'].reset_index(drop=True)

# remove nones
test = test[test.failureType != 'none'].reset_index(drop=True)

# collect metadata
metadata = test[['ID', 'waferMap', 'dieSize', 'lotName', 'dims', 'failureType', 'classifyLabels']]
print(metadata.shape)
metadata.head()

#### Load results from all detect models

In [None]:
with open('../results/yuclassify-paper.pkl', "rb") as fh:
    paper = pickle.load(fh)

with open('../results/yuclassify-paper-short.pkl', "rb") as fh:
    papersh = pickle.load(fh)

with open('../results/yuclassify-paper-knn.pkl', "rb") as fh:
    paperknn = pickle.load(fh)

with open('../results/yuclassify-224.pkl', "rb") as fh:
    c224 = pickle.load(fh)

with open('../results/yuclassify-224-knn.pkl', "rb") as fh:
    c224knn = pickle.load(fh)

with open('../results/yuclassify-224thin2.pkl', "rb") as fh:
    c224thin2 = pickle.load(fh)

with open('../results/yuclassify-224thin4.pkl', "rb") as fh:
    c224thin4 = pickle.load(fh)   
    
    
with open('../results/yuclassify-60.pkl', "rb") as fh:
    c60 = pickle.load(fh)

with open('../results/yuclassify-60-mfilter3.pkl', "rb") as fh:
    c60m3 = pickle.load(fh)

with open('../results/yuclassify-60-thin2.pkl', "rb") as fh:
    c60thin2 = pickle.load(fh)

dfs = [paper, papersh, paperknn, c224, c224knn, c224thin2, c224thin4, c60, c60m3, c60thin2]
df_names = ['paper', 'papersh', 'paperknn', 'c224', 'c224knn', 'c224thin2', 'c224thin4', 'c60', 'c60m3', 'c60thin2']

In [None]:
# collect metadata and predictions in one dataframe
analysis = metadata.copy()

for df, col in zip(dfs, df_names):
    analysis[col] = df[0].tolist()
    
analysis.head()

#### Explore paper mislabeled

In [None]:
# make list of IDs of misclassified wafers
id_lists = [paperid:=[], papershid:=[], paperknnid:=[], c224id:=[], c224knnid:=[], c224thin2id:=[], c224thin4id:=[], 
            c60id:=[], c60m3id:=[], c60thin2id:=[]]

for x, y in zip(id_lists, dfs):
    x.extend([metadata.ID[i] for i in range(len(metadata)) if y[0][i] != metadata.classifyLabels[i]])
    print(len(x))

In [None]:
# keep only subset of paper mislabeled wafers
paper_indices = [analysis.index[analysis.ID == i][0] for i in paperid]
paper_miss = analysis.loc[paper_indices].reset_index(drop=True)
len(paper_miss)

In [None]:
paper_miss.groupby('failureType')['failureType'].count().sort_values(ascending=False)

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Edge-Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Random']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Scratch']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
paper_miss.groupby(['classifyLabels', 'paper'])['failureType'].count().sort_values(ascending=False)

In [None]:
paper_miss.groupby('paper')['failureType'].count().sort_values(ascending=False)

#### Unfiltered

In [None]:
# keep only subset of unfiltered mislabeled wafers
c224_indices = [analysis.index[analysis.ID == i][0] for i in c224id]
c224_miss = analysis.loc[c224_indices].reset_index(drop=True)
len(c224_miss)

In [None]:
c224_miss.groupby('failureType')['failureType'].count().sort_values(ascending=False)

In [None]:
mistakes = [i for i in range(len(c224_miss)) if c224_miss.iloc[i].failureType == 'Edge-Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(c224_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(c224_miss)) if c224_miss.iloc[i].failureType == 'Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(c224_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(c224_miss)) if c224_miss.iloc[i].failureType == 'Random']
random_n = random.sample(mistakes, 9)
helper.plot_list(c224_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(c224_miss)) if c224_miss.iloc[i].failureType == 'Scratch']
random_n = random.sample(mistakes, 9)
helper.plot_list(c224_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
c224_miss.groupby(['classifyLabels', 'c224'])['failureType'].count().sort_values(ascending=False)

In [None]:
c224_miss.groupby('c224')['failureType'].count().sort_values(ascending=False)

#### Most mislabeled

In [None]:
# apply intersect1d to (a list of) multiple lists:
intersection = reduce(np.intersect1d, id_lists[:6])
len(intersection)

In [None]:
# keep only subset of most mislabeled wafers
miss_indices = [analysis.index[analysis.ID == i][0] for i in intersection]
misclassified = analysis.loc[miss_indices].reset_index(drop=True)
len(misclassified)

In [None]:
misclassified.groupby('failureType')['failureType'].count().sort_values(ascending=False)

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Edge-Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Scratch']
random_n = random.sample(mistakes, 8)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Random']
random_n = random.sample(mistakes, 7)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
misclassified.groupby(['classifyLabels', 'paper'])['failureType'].count().sort_values(ascending=False)

In [None]:
misclassified.groupby('paper')['failureType'].count().sort_values(ascending=False)