In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pywt
# from scipy.signal import wiener
# import skimage as skimg 

from sklearn.preprocessing import (MinMaxScaler, StandardScaler, FunctionTransformer)
# from sklearn.model_selection import (train_test_split,)
from sklearn.model_selection import (train_test_split, StratifiedKFold,)
# from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import (SVC)

from sklearn.base import (BaseEstimator, TransformerMixin)
from sklearn.pipeline import (make_pipeline, make_union, Pipeline)
from sklearn.metrics import (classification_report, accuracy_score)

import os
from tqdm import tqdm

In [2]:
class CLASS():
    FAKE = 0
    REAL = 1

WT_NAME = 'db5'
LEVEL = 3

### Load image

In [3]:
PATH_PREFIX = '/home/thienn17/Documents/ICL/'
os.path.exists(PATH_PREFIX)

True

In [4]:
class LoadImage():
    def __init__(self, path_prefix):
        assert os.path.exists(path_prefix), "LoadImage, Path does not exist"
        self.path_prefix = path_prefix

        self.cls_folders = [f for f in os.listdir(path_prefix) if f != 'dummy']
        self.cls_id = 0
        self.path_walk = os.walk(os.path.join(path_prefix, self.cls_folders[0]))
        
        self.images = []
        self.labels = []
    
    def reset(self):
        self.images = []
    
    def next_batch(self):
        try:
            while(True):
                abspath, _, files = next(self.path_walk)
                if len(files) > 0: break
            self.reset()
            for file in tqdm(files):
                img = cv2.imread(os.path.join(abspath, file))
                if img is not None:
                    # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    self.images.append(img)
                    self.labels.append(CLASS.REAL if self.cls_folders[self.cls_id][0] == 'S' else CLASS.FAKE)
            return self.images, self.labels
        except StopIteration:
            if self.cls_id < len(self.cls_folders)-1:
                self.cls_id += 1
                self.path_walk = os.walk(os.path.join(self.path_prefix, self.cls_folders[self.cls_id]))
                return self.next_batch()

        return None

### Feature modules

In [5]:
class WaveletTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, wtname = 'haar', level = 3):
        '''
        waveletname = ['haar', 'db3', 'db5', 'sym2', 'bior5.5', etc.]
        level: total number of decomposite level
        '''
        self.wtname = wtname
        self.level = level
    
    def fit(self, X, y):
        return self

    def transform(self, X):
        features = []
        for imgBGR in tqdm(X):
            img_features = []
            for img in np.moveaxis(imgBGR, -1, 0):
                wt = pywt.wavedec2(data=img, wavelet=self.wtname, level=self.level)
                # appr = wt[0]
                details = wt[1:]
                wt = []
                for levels in details:
                    # print(len(details), len(levels))
                    for detail in levels:
                        wt.append(detail)
                for _wt in wt:
                    img_features.append(np.mean(_wt))
                    img_features.append(np.var(_wt))
            features.append(img_features)
        return features

### Extract & Dump

In [6]:
LEVEL = 1
extractor = WaveletTransformer(level=LEVEL)
A = np.empty((1,3*3*2*LEVEL))
b = None
loader = LoadImage(PATH_PREFIX)
batch = 1
while(True):
    print("Batch {}".format(batch))
    batch += 1
    res = loader.next_batch()
    if res is None:
        break
    images, b = res
    A = np.concatenate((A, extractor.transform(images)), axis=0)

Batch 1


100%|██████████| 100/100 [00:18<00:00,  5.28it/s]
100%|██████████| 100/100 [01:10<00:00,  1.42it/s]


Batch 2


100%|██████████| 100/100 [00:06<00:00, 14.35it/s]
100%|██████████| 100/100 [00:25<00:00,  4.00it/s]


Batch 3


100%|██████████| 100/100 [00:09<00:00, 10.06it/s]
100%|██████████| 100/100 [00:31<00:00,  3.16it/s]


Batch 4


100%|██████████| 5/5 [00:00<00:00, 10.41it/s]
100%|██████████| 5/5 [00:01<00:00,  3.13it/s]


Batch 5


100%|██████████| 100/100 [00:07<00:00, 13.08it/s]
100%|██████████| 100/100 [00:52<00:00,  1.91it/s]


Batch 6


100%|██████████| 100/100 [00:06<00:00, 15.64it/s]
100%|██████████| 100/100 [00:25<00:00,  3.99it/s]


Batch 7


100%|██████████| 100/100 [00:19<00:00,  5.17it/s]
100%|██████████| 100/100 [01:09<00:00,  1.44it/s]


Batch 8


100%|██████████| 100/100 [00:16<00:00,  5.94it/s]
100%|██████████| 100/100 [00:49<00:00,  2.02it/s]


Batch 9


100%|██████████| 100/100 [00:24<00:00,  4.10it/s]
100%|██████████| 100/100 [02:21<00:00,  1.41s/it]


Batch 10


100%|██████████| 100/100 [00:09<00:00, 10.22it/s]
100%|██████████| 100/100 [00:30<00:00,  3.33it/s]


Batch 11


100%|██████████| 180/180 [00:12<00:00, 14.44it/s]
100%|██████████| 180/180 [00:25<00:00,  7.12it/s]


Batch 12


100%|██████████| 180/180 [00:10<00:00, 17.84it/s]
100%|██████████| 180/180 [00:18<00:00,  9.50it/s]


Batch 13


100%|██████████| 180/180 [00:11<00:00, 16.16it/s]
100%|██████████| 180/180 [00:21<00:00,  8.19it/s]


Batch 14


100%|██████████| 180/180 [00:11<00:00, 15.60it/s]
100%|██████████| 180/180 [00:22<00:00,  7.99it/s]


Batch 15


100%|██████████| 180/180 [00:11<00:00, 15.66it/s]
100%|██████████| 180/180 [00:22<00:00,  7.87it/s]


Batch 16


100%|██████████| 180/180 [00:09<00:00, 18.16it/s]
100%|██████████| 180/180 [00:19<00:00,  9.14it/s]


Batch 17


100%|██████████| 180/180 [00:10<00:00, 16.52it/s]
100%|██████████| 180/180 [00:23<00:00,  7.70it/s]


Batch 18


100%|██████████| 180/180 [00:11<00:00, 15.24it/s]
100%|██████████| 180/180 [00:30<00:00,  5.92it/s]

Batch 19





In [7]:
A = A[1:]
b = np.array(b)
A.shape

(2345, 18)

In [8]:
PATH_DUMP = "./object dump/no union"
os.path.exists(PATH_DUMP)

True

In [9]:
np.save(os.path.join(PATH_DUMP, "wt_BGR_3lv.npy"), A)
np.save(os.path.join(PATH_DUMP, "wt_BGR_3lv_label.npy"), b)

### Load

In [4]:
PATH_DUMP = "./object dump/no union"
os.path.exists(PATH_DUMP)

True

In [5]:
A = np.load(os.path.join(PATH_DUMP, "wt_BGR.npy"))
b = np.load(os.path.join(PATH_DUMP, "wt_BGR_label.npy"))
A.shape

(2345, 24)

## Train

### Single fit

In [10]:
pipe = make_pipeline(StandardScaler(), SVC())
pipe

In [11]:
A_train, A_test, b_train, b_test = train_test_split(A, b,
                                                    train_size=0.8,
                                                    random_state=42,
                                                    stratify=b)

In [12]:
pipe = pipe.fit(A_train, b_train)

In [13]:
b_pred = pipe.predict(A_test)
print(classification_report(b_test, b_pred))

              precision    recall  f1-score   support

           0       0.98      0.95      0.96       288
           1       0.93      0.97      0.95       181

    accuracy                           0.96       469
   macro avg       0.95      0.96      0.96       469
weighted avg       0.96      0.96      0.96       469



### KFold

In [20]:
def kfold(_estimator):
    # print(_estimator)
    
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=27)
    score = 0
    for k, (i_train, i_test) in enumerate(kf.split(A, b)):
        model = _estimator.fit(A[i_train,], b[i_train])

        y_pred = model.predict(A[i_test,])
        acc = accuracy_score(y_true=b[i_test], y_pred=y_pred)
        score += acc

        print("[Fold {}] Acc: {:.3f}".format(k+1, acc))
        # print(classification_report(y_true=b[i_test], y_pred=y_pred))
        # print("Acc: {:.3f}".format(acc))

    print("Mean acc: {:.3f}\n".format(score/kf.get_n_splits()))

In [21]:
kfold(pipe)

[Fold 1] Acc: 0.959
[Fold 2] Acc: 0.955
[Fold 3] Acc: 0.964
[Fold 4] Acc: 0.949
[Fold 5] Acc: 0.968
Mean acc: 0.959

