In [None]:
# import libraries
import os
import time
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle5 as pickle
from functools import reduce

import helpers as helper

#### Metadata from raw data file

In [None]:
# load data
with open('../../data/WM-clean.pkl', "rb") as fh:
    raw_data = pickle.load(fh)

# add index column to identify specific wafers 
raw_data.reset_index(inplace=True)
raw_data = raw_data.rename(columns={'index':'ID', 'shape': 'dims'})

# add detection model labels
raw_data['detectLabels'] = raw_data['failureType'].apply(lambda x: 0 if x == 'none' else 1)

# add classification model labels
fail_dict = {'none': 8, 'Loc': 0, 'Edge-Loc': 1, 'Center': 2, 'Edge-Ring': 3, 
             'Scratch': 4, 'Random': 5, 'Near-full': 6, 'Donut': 7}
raw_data['classifyLabels'] = raw_data['failureType'].apply(lambda x: fail_dict[x])

# keep only test set
test = raw_data[raw_data.dataset == 'test'].reset_index(drop=True)

# collect metadata
metadata = test[['ID', 'waferMap', 'dieSize', 'lotName', 'dims', 'failureType', 'detectLabels', 'classifyLabels']]
print(metadata.shape)
metadata.head()

#### Load results from all detect models

In [None]:
with open('../results/yudetect-paper.pkl', "rb") as fh:
    paper = pickle.load(fh)

with open('../results/yudetect-224.pkl', "rb") as fh:
    d224 = pickle.load(fh)

with open('../results/yudetect-224-thin2.pkl', "rb") as fh:
    d224thin2 = pickle.load(fh)

with open('../results/yudetect-224-thin4.pkl', "rb") as fh:
    d224thin4 = pickle.load(fh)
    
with open('../results/yudetect-60.pkl', "rb") as fh:
    d60 = pickle.load(fh)

with open('../results/yudetect-60-mfilter3.pkl', "rb") as fh:
    d60m3 = pickle.load(fh)

with open('../results/yudetect-60-thin2.pkl', "rb") as fh:
    d60thin2 = pickle.load(fh)

dfs = [paper, d224, d224thin2, d224thin4, d60, d60m3, d60thin2]
df_names = ['paper', 'd224', 'd224thin2', 'd224thin4', 'd60', 'd60m3', 'd60thin2']

In [None]:
# collect metadata and predictions in one dataframe
analysis = metadata.copy()

for df, col in zip(dfs, df_names):
    analysis[col] = df[0].tolist()
    
analysis.head()

#### Explore paper mislabeled

In [None]:
# make list of IDs of misclassified wafers
dfs = [paper, d224, d224thin2, d224thin4, d60, d60m3, d60thin2]
id_lists = [paperid:=[], d224id:=[], d224thin2id:=[], d224thin4id:=[], d60id:=[], d60m3id:=[], d60thin2id:=[]]

for x, y in zip(id_lists, dfs):
    x.extend([metadata.ID[i] for i in range(len(metadata)) if y[0][i] != metadata.detectLabels[i]])
    print(len(x))

In [None]:
# keep only subset of paper mislabeled wafers
paper_indices = [analysis.index[analysis.ID == i][0] for i in paperid]
paper_miss = analysis.loc[paper_indices].reset_index(drop=True)
len(paper_miss)

In [None]:
paper_miss.groupby('failureType')['failureType'].count().sort_values(ascending=False)

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'none']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Edge-Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Scratch']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(paper_miss)) if paper_miss.iloc[i].failureType == 'Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(paper_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

#### Explore thinned mislabeled

In [None]:
# keep only subset of thinned mislabeled wafers
thin_indices = [analysis.index[analysis.ID == i][0] for i in d224thin2id]
thin_miss = analysis.loc[thin_indices].reset_index(drop=True)
len(thin_miss)

In [None]:
thin_miss.groupby('failureType')['failureType'].count().sort_values(ascending=False)

In [None]:
mistakes = [i for i in range(len(thin_miss)) if thin_miss.iloc[i].failureType == 'none']
random_n = random.sample(mistakes, 9)
helper.plot_list(thin_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(thin_miss)) if thin_miss.iloc[i].failureType == 'Edge-Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(thin_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(thin_miss)) if thin_miss.iloc[i].failureType == 'Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(thin_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(thin_miss)) if thin_miss.iloc[i].failureType == 'Scratch']
random_n = random.sample(mistakes, 9)
helper.plot_list(thin_miss, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

#### Most mislabeled

In [None]:
# apply intersect1d to (a list of) multiple lists:
intersection = reduce(np.intersect1d, id_lists[:3])
len(intersection)

In [None]:
# keep only subset of most mislabeled wafers
miss_indices = [analysis.index[analysis.ID == i][0] for i in intersection]
misclassified = analysis.loc[miss_indices].reset_index(drop=True)
len(misclassified)

In [None]:
misclassified.groupby('failureType')['failureType'].count().sort_values(ascending=False)

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'none']
random_n = random.sample(mistakes, 9)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Edge-Loc']
random_n = random.sample(mistakes, 9)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')

In [None]:
mistakes = [i for i in range(len(misclassified)) if misclassified.iloc[i].failureType == 'Scratch']
random_n = random.sample(mistakes, 9)
helper.plot_list(misclassified, random_n, fig_size=(5,5), col='waferMap', cmap='inferno')