## Imports

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score,roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import MultinomialNB
from sklearn import preprocessing
from sklearn import svm, linear_model
import pickle
import seaborn as sns
X_st_train,X_st_test,y_st_train,y_st_test,X_train,X_test,un_X,un_y,y_train,y_test,params,estm,pred_un=[0,0,0,0,0,0,0,0,0,0,0,0,0]
%matplotlib inline

## Models Pipeline

In [2]:
# Function to pipeline training data to standard scalar, selected classifiers then grid search for
# best parameters and output confusion matrix.
def Models_func(X_train=X_train,
                      X_test=X_test,
                      y_train=y_train,
                      y_test=y_test,
                      params={},
                      estm='LR',
                un_X=0,
                un_y=0,
                pred_un=False):
    
    if estm=='LR': #pipline and GridSearchCV Logistic Regression
        model_name='    Logistic Regression '
        pipeline = Pipeline([('scaler', StandardScaler()),('model',LogisticRegression())])
        grid = GridSearchCV(pipeline, cv=5, n_jobs=-1, param_grid=params)
        
    elif estm=='KNN': #pipline and GridSearchCV KNeighbors Classifier
        model_name='    KNN Classifier '
        pipeline = Pipeline([('scaler', StandardScaler()),('model',KNeighborsClassifier())])
        grid = GridSearchCV(pipeline, cv=5, n_jobs=-1, param_grid=params)
    elif estm=='DTC': #pipline and GridSearchCV DTC Classifier
        model_name='    Decision Tree Classifier '
        pipeline = Pipeline([('scaler', StandardScaler()),('model',DecisionTreeClassifier())])
        grid = GridSearchCV(pipeline, cv=5, n_jobs=-1, param_grid=params)
    elif estm=='RFC': #pipline and Random Forrest Classifier
        model_name='    Random Forrest Classifier '
        pipeline = Pipeline([('scaler', StandardScaler()),('model',RandomForestClassifier())])
        grid = GridSearchCV(pipeline, cv=5, n_jobs=-1, param_grid=params)
    elif estm=='ETC': #pipline and Extra Tree Classifier
        model_name='    Extra Tree Classifier '
        pipeline = Pipeline([('scaler', StandardScaler()),('model',ExtraTreesClassifier())])
        grid = GridSearchCV(pipeline, cv=5, n_jobs=-1, param_grid=params)
    elif estm=='SVM': #pipline and Support Vector Machines
        model_name='    Support Vector Machine '
        pipeline = Pipeline([('scaler', StandardScaler()),('model',svm.SVC())])
        grid = GridSearchCV(pipeline, cv=5, n_jobs=-1, param_grid=params)
        
    
    print('################################################')
    print('  {}'.format(model_name))
    print('################################################')
        
    grid.fit(X_train, y_train) #fit to GridSearchCV 
    best_param=grid.best_estimator_.get_params()
    print('')
    print('Best parameters of classifier from GridSearchCV')
    print('-----------------------------------------------------------')
    print('')
    for param_name in sorted(params.keys()):
        print('%s: %r'%(param_name,best_param[param_name]))
    predict=grid.predict(X_test)
    score_train=grid.score(X_train, y_train)
    # Score our model on the testing set.
    score_test=grid.score(X_test, y_test)     
    # Generate a confusion matrix.
    with open ('./models/'+estm+'_model.plk','wb') as f:
        pickle.dump(grid.best_estimator_,f)
    cm=confusion_matrix(y_test, predict,labels=[1,2,3,4,5,6,7,8])
    
    cm_df = pd.DataFrame(cm, index=['Actual C1','Actual C2',
                                    'Actual C3','Actual C4',
                                    'Actual C5','Actual C6',
                                    'Actual C7','Actual C8'], 
                          columns=['Predicted C1','Predicted C2',
                                   'Predicted C3','Predicted C4',
                                   'Predicted C5','Predicted C6',
                                   'Actual C7','Actual C8'])
    cr=classification_report(y_test,predict)
    accu = round(accuracy_score(y_test,predict)*100,2)
    lb = preprocessing.LabelBinarizer()
    lb.fit(y_test)
    y_lb = lb.transform(y_test)
    y_pred_lb = lb.transform(predict)
    ras=roc_auc_score(y_lb, y_pred_lb, average="macro")
    Score_view(score_train,score_test,cm_df,cr,accu,ras)
        
    if pred_un==True:
        unseen_pred=grid.predict(un_X)
        k=0
        for i,n in enumerate(un_y.cat.tolist()):

            if not n==unseen_pred[i]:
                k=k+1
        print('----------------------')
        print('Unseen Data evaluation')
        print('----------------------')
        print('')        
        print('Total classified unseen image count: ',len(un_y))
        print('Miss classified unseen image count: ',k)
        print('% of miss classified unseen images: ',(k/len(un_y))*100,'%')
        print('')

    

## Score board function

In [3]:
#function to print scores
#Inputs:
    # score_train - train score
    # score_test -test score
    # cm_df - confusion matrix dataframe
    # cr - classification report value
    # accu - accuracy value
    # ras  - ROCAUC value
score_train,score_test,cm_df,cr,accu,ras=[0,0,0,0,0,0]
def Score_view(score_train=score_train,score_test=score_test,cm_df=cm_df,cr=cr,accu=accu,ras=ras):
    print("")
    print('Score our model on the training set: ',score_train)
    print("")
    print('Score our model on the testing set: ',score_test)
    print('')
    print('** Confusion matrix **')
    print('----------------------')
    print(cm_df)
    print('')
    print('** Classification Report **')
    print('---------------------------')
    print(cr)
    print('')
    print('---------------------')
    print('| Accuracy : {}% |'.format(accu))
    print('---------------------')
    print('')
    print('---------------------')
    print('| ROC Score : {}% |'.format(round(ras*100,3)))
    print('---------------------')
    print('')
    print('===========================================================================')
    print('')
    print('')

## Beep Beep function :)

In [4]:
#function to make a Beep Beep Beep sound if needed
def beep_beep():
    import winsound
    frequency = 2500  # Set Frequency To 2500 Hertz
    duration = 100  # Set Duration To 1000 ms == 1 second
    winsound.Beep(frequency, duration)
    winsound.Beep(frequency, duration)
    winsound.Beep(frequency, duration)

## Test and Train dataset loading

In [5]:
#Path of all labeled images list
c1path='./data/C1.csv'
c2path='./data/C2.csv'
c3path='./data/C3.csv'
c4path='./data/C4.csv'
c5path='./data/C5.csv'
c6path='./data/C6.csv'
c7path='./data/C7.csv'
c8path='./data/C8.csv'

In [6]:
#concat all labeled image list
df=pd.read_csv(c1path)
dftemp=pd.read_csv(c2path)
df = pd.concat([df, dftemp],ignore_index=True)
dftemp=pd.read_csv(c3path)
df = pd.concat([df, dftemp], ignore_index=True)
dftemp=pd.read_csv(c4path)
df = pd.concat([df, dftemp], ignore_index=True)
dftemp=pd.read_csv(c5path)
df = pd.concat([df, dftemp], ignore_index=True)
dftemp=pd.read_csv(c6path)
df = pd.concat([df, dftemp], ignore_index=True)
dftemp=pd.read_csv(c7path)
df = pd.concat([df, dftemp], ignore_index=True)
dftemp=pd.read_csv(c8path)
df = pd.concat([df, dftemp], ignore_index=True)

In [7]:
df.shape

(24000, 9)

In [8]:
df.cat.unique()

array([1, 2, 3, 4, 5, 6, 7, 8], dtype=int64)

In [9]:
df.isnull().sum()

lbpene           0
lbpent       18853
dissim           0
homogen          0
ener             0
corre            0
gabor_ene        0
gabor_ent    22924
cat              0
dtype: int64

In [10]:
#any missing data fill with 0
df.fillna(0,inplace=True)

In [11]:
df.head()

Unnamed: 0,lbpene,lbpent,dissim,homogen,ener,corre,gabor_ene,gabor_ent,cat
0,0.918707,0.0,0.037872,0.981065,0.881493,0.81612,0.998978,0.0,1
1,0.919737,0.0,0.036982,0.981511,0.88673,0.809414,0.999009,0.0,1
2,0.915414,0.0,0.040689,0.979658,0.877484,0.806211,0.998642,0.0,1
3,0.91698,0.0,0.039246,0.98038,0.881061,0.806787,0.999037,0.0,1
4,0.917559,0.0,0.037253,0.981374,0.884812,0.812264,0.998662,0.0,1


In [12]:
df.tail()

Unnamed: 0,lbpene,lbpent,dissim,homogen,ener,corre,gabor_ene,gabor_ent,cat
23995,0.752845,0.0,0.241025,0.904453,0.74031,0.999451,0.728165,0.0,8
23996,0.770907,0.737184,0.186435,0.919888,0.711262,0.999753,0.687355,0.0,8
23997,0.774448,0.730525,0.199259,0.919788,0.750928,0.999579,0.751846,0.0,8
23998,0.776562,0.0,0.208074,0.917729,0.736606,0.999569,0.73265,0.0,8
23999,0.779119,0.720844,0.184394,0.920184,0.730101,0.999705,0.7118,0.0,8


## Split train and test set

In [13]:
#select X and y then split train and test data
X=df.drop(columns=['cat'])
y=df.drop(columns=['lbpene','lbpent','dissim','homogen','ener','corre','gabor_ene','gabor_ent'])

X_st_train, X_st_test, y_st_train, y_st_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42,
                                                    stratify=y)

## Grid search, Model training and View scores

### Logistic Regression

In [14]:
# Logistic Regression training and grid search
#parameter range to be searched for best parameters
params={'model__C':[0.01,1,10,100,1000],
        'model__penalty':['l1','l2'],
         'model__multi_class':['multinomial'],
    'model__solver':['saga']}

#'LR' - for Logistic Regression
estm='LR'
Models_func(X_train=X_st_train,X_test=X_st_test,y_train=y_st_train,y_test=y_st_test,params=params,estm=estm)

################################################
      Logistic Regression 
################################################


  y = column_or_1d(y, warn=True)



Best parameters of classifier from GridSearchCV
-----------------------------------------------------------

model__C: 1000
model__multi_class: 'multinomial'
model__penalty: 'l1'
model__solver: 'saga'

Score our model on the training set:  0.9708928571428571

Score our model on the testing set:  0.9705555555555555

** Confusion matrix **
----------------------
           Predicted C1  Predicted C2  Predicted C3  Predicted C4  \
Actual C1           895             0             5             0   
Actual C2             0           897             3             0   
Actual C3            29             0           871             0   
Actual C4            57             0             0           822   
Actual C5             0             0             0             0   
Actual C6             0             0             0            97   
Actual C7             0             0             0             0   
Actual C8             0             0             0             0   

           Pre



### KNeighbors Classifier

In [15]:
# KNeighbors Classifier training and grid search
#parameter range to be searched for best parameters
params={'model__leaf_size':[10,20,30,40],
       'model__n_neighbors':[3,5,7,9,11]}

#'KNN' - for KNeighbors
estm='KNN'
Models_func(X_train=X_st_train,X_test=X_st_test,y_train=y_st_train,y_test=y_st_test,params=params,estm=estm)

################################################
      KNN Classifier 
################################################


  self._final_estimator.fit(Xt, y, **fit_params)



Best parameters of classifier from GridSearchCV
-----------------------------------------------------------

model__leaf_size: 10
model__n_neighbors: 3

Score our model on the training set:  0.9986904761904762

Score our model on the testing set:  0.9965277777777778

** Confusion matrix **
----------------------
           Predicted C1  Predicted C2  Predicted C3  Predicted C4  \
Actual C1           899             0             1             0   
Actual C2             0           899             1             0   
Actual C3             2             0           898             0   
Actual C4             3             0             0           893   
Actual C5             0             0             0             0   
Actual C6             0             0             0            14   
Actual C7             0             0             0             0   
Actual C8             0             0             0             0   

           Predicted C5  Predicted C6  Actual C7  Actual C8  
A

### Decision Tree Classifier

In [16]:
# Decision Tree Classifier training and grid search
#parameter range to be searched for best parameters
params={'model__max_depth': [3, 5, 7, 10],
       'model__min_samples_split': [5, 10, 15, 20],
       'model__min_samples_leaf': [2, 3, 4, 5, 6, 7]}

#'DTC' - for Decision Tree Classifier
estm='DTC'
Models_func(X_train=X_st_train,X_test=X_st_test,y_train=y_st_train,y_test=y_st_test,params=params,estm=estm)

################################################
      Decision Tree Classifier 
################################################

Best parameters of classifier from GridSearchCV
-----------------------------------------------------------

model__max_depth: 10
model__min_samples_leaf: 5
model__min_samples_split: 10

Score our model on the training set:  0.998452380952381

Score our model on the testing set:  0.9963888888888889

** Confusion matrix **
----------------------
           Predicted C1  Predicted C2  Predicted C3  Predicted C4  \
Actual C1           898             0             0             2   
Actual C2             0           900             0             0   
Actual C3             0             0           900             0   
Actual C4             2             0             0           894   
Actual C5             0             0             0             0   
Actual C6             0             0             0            18   
Actual C7             0             0  

### Random Forest Classifier

In [17]:
# Random Forest Classifier training and grid search
#parameter range to be searched for best parameters
params={'model__n_estimators': [100, 150, 200],
       'model__max_depth': [None, 1, 2, 3, 4, 5]}

#'RFC' - for Random Forest Classifier
estm='RFC'
Models_func(X_train=X_st_train,X_test=X_st_test,y_train=y_st_train,y_test=y_st_test,params=params,estm=estm)

################################################
      Random Forrest Classifier 
################################################


  self._final_estimator.fit(Xt, y, **fit_params)



Best parameters of classifier from GridSearchCV
-----------------------------------------------------------

model__max_depth: None
model__n_estimators: 200

Score our model on the training set:  1.0

Score our model on the testing set:  0.9988888888888889

** Confusion matrix **
----------------------
           Predicted C1  Predicted C2  Predicted C3  Predicted C4  \
Actual C1           899             0             0             1   
Actual C2             0           900             0             0   
Actual C3             0             0           900             0   
Actual C4             1             0             0           899   
Actual C5             0             0             0             0   
Actual C6             0             0             0             6   
Actual C7             0             0             0             0   
Actual C8             0             0             0             0   

           Predicted C5  Predicted C6  Actual C7  Actual C8  
Actual C1  

### Extra Trees Classifier

In [18]:
# Extra Trees Classifier training and grid search
#parameter range to be searched for best parameters
params={'model__n_estimators': [100, 150, 200],
       'model__max_depth': [None, 1, 2, 3, 4, 5]}

#'ETC - for Extra Trees Classifier
estm='ETC'
Models_func(X_train=X_st_train,X_test=X_st_test,y_train=y_st_train,y_test=y_st_test,params=params,estm=estm)

################################################
      Extra Tree Classifier 
################################################


  self._final_estimator.fit(Xt, y, **fit_params)



Best parameters of classifier from GridSearchCV
-----------------------------------------------------------

model__max_depth: None
model__n_estimators: 100

Score our model on the training set:  1.0

Score our model on the testing set:  0.9990277777777777

** Confusion matrix **
----------------------
           Predicted C1  Predicted C2  Predicted C3  Predicted C4  \
Actual C1           899             0             0             1   
Actual C2             0           900             0             0   
Actual C3             0             0           900             0   
Actual C4             1             0             0           899   
Actual C5             0             0             0             0   
Actual C6             0             0             0             5   
Actual C7             0             0             0             0   
Actual C8             0             0             0             0   

           Predicted C5  Predicted C6  Actual C7  Actual C8  
Actual C1  

### Support Vector Machines

In [19]:
# Support Vector Machines training and grid search
#parameter range to be searched for best parameters
params={'model__gamma' : np.logspace(-1, 2, 10),
       'model__C' : np.logspace(-1, 2, 10),
       'model__kernel' : ['rbf','linear','sigmoid']}

#SVM - for Support Vector Machines
estm='SVM'
Models_func(X_train=X_st_train,X_test=X_st_test,y_train=y_st_train,y_test=y_st_test,params=params,estm=estm)

################################################
      Support Vector Machine 
################################################


  y = column_or_1d(y, warn=True)



Best parameters of classifier from GridSearchCV
-----------------------------------------------------------

model__C: 100.0
model__gamma: 21.54434690031882
model__kernel: 'rbf'

Score our model on the training set:  0.9994047619047619

Score our model on the testing set:  0.9973611111111111

** Confusion matrix **
----------------------
           Predicted C1  Predicted C2  Predicted C3  Predicted C4  \
Actual C1           900             0             0             0   
Actual C2             0           899             1             0   
Actual C3             0             0           899             0   
Actual C4             1             0             0           895   
Actual C5             0             0             0             0   
Actual C6             0             0             0            12   
Actual C7             0             0             0             0   
Actual C8             0             0             0             0   

           Predicted C5  Predicted C6

In [20]:
#beep and alert when done
beep_beep()

## Validating using a set of unseen data

In [2]:
# path on unseen data image list
c1path='./data/unseenC1.csv'
c2path='./data/unseenC2.csv'
c3path='./data/unseenC3.csv'
c4path='./data/unseenC4.csv'
c5path='./data/unseenC5.csv'
c6path='./data/unseenC6.csv'
c7path='./data/unseenC7.csv'
c8path='./data/unseenC8.csv'

In [3]:
# concat all unseen image list
un_df=pd.read_csv(c1path)
un_dftemp=pd.read_csv(c2path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
un_dftemp=pd.read_csv(c3path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
un_dftemp=pd.read_csv(c4path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
un_dftemp=pd.read_csv(c5path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
un_dftemp=pd.read_csv(c6path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
un_dftemp=pd.read_csv(c7path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
un_dftemp=pd.read_csv(c8path)
un_df = pd.concat([un_df, un_dftemp], ignore_index=True)
print(un_df.head())
print(un_df.shape)
print(un_df.cat.unique())

     lbpene  lbpent    dissim   homogen      ener     corre  gabor_ene  \
0  0.916520     NaN  0.039602  0.980203  0.879294  0.808537   0.998985   
1  0.917885     NaN  0.037981  0.981010  0.882821  0.812165   0.998796   
2  0.917844     NaN  0.039431  0.980285  0.882734  0.802900   0.998664   
3  0.917760     NaN  0.038532  0.980734  0.882066  0.809530   0.998894   
4  0.917855     NaN  0.038371  0.980816  0.878643  0.816948   0.998612   

   gabor_ent  cat  
0        NaN    1  
1        NaN    1  
2        NaN    1  
3        NaN    1  
4        NaN    1  
(1790, 9)
[1 2 3 4 5 6 7 8]


In [4]:
un_df.isnull().sum()

lbpene          0
lbpent       1767
dissim          0
homogen         0
ener            0
corre           0
gabor_ene       0
gabor_ent    1752
cat             0
dtype: int64

In [5]:
# replace all null values with 0
un_df.fillna(0,inplace=True)

In [6]:
# select X and y
unseen_X=un_df.drop(columns=['cat'])
unseen_y=un_df.drop(columns=['lbpene','lbpent','dissim','homogen','ener','corre','gabor_ene','gabor_ent'])


In [14]:
#Function to load model, classify and output confusion matrix and yield.
def predict_unseen(unseen_X=unseen_X,unseen_y=unseen_y,estm='LR'):
    with open ('./models/'+estm+'_model.plk','rb') as f:
        model=pickle.load(f)
    predict=model.predict(unseen_X)
    
    k=0
    yld=0
    for i,n in enumerate(unseen_y.cat):
        if not n==predict[i]:
                k=k+1
        if n==1 and predict[i]==1:
            yld=yld+1
        
    print('----------------------')
    print('Unseen Data evaluation')
    print('----------------------')
    print('')        
    print('Total classified unseen image count: ',len(unseen_y))
    print('Miss classified unseen image count: ',k)
    print('% of miss classified unseen images: ',(k/len(unseen_y))*100,'%')
    print('')
    print('Batch Yield %:', (yld/len(unseen_y)*100),'%')
    print('')
    cdf=pd.DataFrame(predict,columns=['predicted'])
    cdf = pd.concat([cdf, unseen_y.cat],axis=1)
    print('Unseen data prediction confusion matrix')
    confusion_matrix = pd.crosstab(cdf['cat'], cdf['predicted'], rownames=['Actual'], colnames=['Predicted'])
    cm=pd.DataFrame(confusion_matrix)
    print(cm)

### Logistic Regression validation

In [15]:
predict_unseen(unseen_X=unseen_X,estm='LR')

----------------------
Unseen Data evaluation
----------------------

Total classified unseen image count:  1790
Miss classified unseen image count:  14
% of miss classified unseen images:  0.782122905027933 %

Batch Yield %: 72.40223463687151 %

Unseen data prediction confusion matrix
Predicted     1    2   3   4   5  6    7   8
Actual                                      
1          1296    0   4   0   0  0    0   0
2             0  199   1   0   0  0    0   0
3             5    0  95   0   0  0    0   0
4             3    0   0  52   0  0    0   0
5             0    0   0   0  10  0    0   0
6             0    0   0   1   0  9    0   0
7             0    0   0   0   0  0  100   0
8             0    0   0   0   0  0    0  15


Number of defective images have been classified as good and some good images are classified as defective. Miss classification at 0.79%

### KNeighbors Classifier validation

In [16]:
predict_unseen(unseen_X=unseen_X,estm='KNN')

----------------------
Unseen Data evaluation
----------------------

Total classified unseen image count:  1790
Miss classified unseen image count:  2
% of miss classified unseen images:  0.11173184357541899 %

Batch Yield %: 72.56983240223464 %

Unseen data prediction confusion matrix
Predicted     1    2    3   4   5   6    7   8
Actual                                        
1          1299    0    0   1   0   0    0   0
2             0  199    1   0   0   0    0   0
3             0    0  100   0   0   0    0   0
4             0    0    0  55   0   0    0   0
5             0    0    0   0  10   0    0   0
6             0    0    0   0   0  10    0   0
7             0    0    0   0   0   0  100   0
8             0    0    0   0   0   0    0  15


No defective images have been classified as good and some good images classified as defective. Miss classification at 0.11%

### Decision Tree Classifier validation

In [17]:
predict_unseen(unseen_X=unseen_X,estm='DTC')

----------------------
Unseen Data evaluation
----------------------

Total classified unseen image count:  1790
Miss classified unseen image count:  3
% of miss classified unseen images:  0.16759776536312848 %

Batch Yield %: 72.45810055865923 %

Unseen data prediction confusion matrix
Predicted     1    2    3   4   5   6    7   8
Actual                                        
1          1297    0    0   3   0   0    0   0
2             0  200    0   0   0   0    0   0
3             0    0  100   0   0   0    0   0
4             0    0    0  55   0   0    0   0
5             0    0    0   0  10   0    0   0
6             0    0    0   0   0  10    0   0
7             0    0    0   0   0   0  100   0
8             0    0    0   0   0   0    0  15


No defective images have been classified as good some good images are classified and defective. Miss classification at 0.16%

### Random Forest Classifier validation

In [18]:
predict_unseen(unseen_X=unseen_X,estm='RFC')

----------------------
Unseen Data evaluation
----------------------

Total classified unseen image count:  1790
Miss classified unseen image count:  3
% of miss classified unseen images:  0.16759776536312848 %

Batch Yield %: 72.45810055865923 %

Unseen data prediction confusion matrix
Predicted     1    2    3   4   5   6    7   8
Actual                                        
1          1297    0    0   3   0   0    0   0
2             0  200    0   0   0   0    0   0
3             0    0  100   0   0   0    0   0
4             0    0    0  55   0   0    0   0
5             0    0    0   0  10   0    0   0
6             0    0    0   0   0  10    0   0
7             0    0    0   0   0   0  100   0
8             0    0    0   0   0   0    0  15


No defective images have been classified as good some good images are classified and defective. Miss classification at 0.16%

### Extra Trees Classifier validation

In [19]:
predict_unseen(unseen_X=unseen_X,estm='ETC')

----------------------
Unseen Data evaluation
----------------------

Total classified unseen image count:  1790
Miss classified unseen image count:  1
% of miss classified unseen images:  0.055865921787709494 %

Batch Yield %: 72.62569832402235 %

Unseen data prediction confusion matrix
Predicted     1    2    3   4   5   6    7   8
Actual                                        
1          1300    0    0   0   0   0    0   0
2             0  199    1   0   0   0    0   0
3             0    0  100   0   0   0    0   0
4             0    0    0  55   0   0    0   0
5             0    0    0   0  10   0    0   0
6             0    0    0   0   0  10    0   0
7             0    0    0   0   0   0  100   0
8             0    0    0   0   0   0    0  15


No defective images have been classified as good, no good images are classified and defective. Miss classification at 0.05%

### Support Vector Machines validation

In [20]:
predict_unseen(unseen_X=unseen_X,estm='SVM')

----------------------
Unseen Data evaluation
----------------------

Total classified unseen image count:  1790
Miss classified unseen image count:  1
% of miss classified unseen images:  0.055865921787709494 %

Batch Yield %: 72.62569832402235 %

Unseen data prediction confusion matrix
Predicted     1    2    3   4   5   6    7   8
Actual                                        
1          1300    0    0   0   0   0    0   0
2             0  199    1   0   0   0    0   0
3             0    0  100   0   0   0    0   0
4             0    0    0  55   0   0    0   0
5             0    0    0   0  10   0    0   0
6             0    0    0   0   0  10    0   0
7             0    0    0   0   0   0  100   0
8             0    0    0   0   0   0    0  15


No defective images have been classified as good, no good images are classified and defective. Miss classification at 0.05%

## Conclusion
Based on train and test scores, accuracy and unseen data miss classification rate either Extra Trees Classifier and Support Vector Machine can be deployed.