In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pywt
from scipy.signal import wiener
import skimage as skimg 

from sklearn.preprocessing import (MinMaxScaler, StandardScaler)
# from sklearn.model_selection import (train_test_split,)
from sklearn.model_selection import (train_test_split, StratifiedKFold,)
# from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import (SVC)

from sklearn.base import (BaseEstimator, TransformerMixin)
from sklearn.pipeline import (make_pipeline, make_union,)
from sklearn.metrics import (classification_report, accuracy_score)

import os
from tqdm import tqdm

In [2]:
class CLASS():
    FAKE = 0
    REAL = 1

### Load image

In [3]:
PATH_PREFIX = '/home/thienn17/Documents/ICL/'
os.path.exists(PATH_PREFIX)

True

In [4]:
class LoadImage():
    def __init__(self, path_prefix, colorcvt=None):
        assert os.path.exists(path_prefix), "LoadImage, Path does not exist"
        self.path_prefix = path_prefix
        self.colorcvt = colorcvt

        self.cls_folders = [f for f in os.listdir(path_prefix) if f != 'dummy']
        self.cls_id = 0
        self.path_walk = os.walk(os.path.join(path_prefix, self.cls_folders[0]))
        
        self.images = []
        self.labels = []
    
    def reset(self):
        self.images = []
    
    def next_batch(self):
        try:
            while(True):
                abspath, _, files = next(self.path_walk)
                if len(files) > 0: break
            self.reset()
            print(abspath)
            for file in tqdm(files):
                img = cv2.imread(os.path.join(abspath, file))
                if img is not None:
                    if self.colorcvt is not None:
                        img = cv2.cvtColor(img, self.colorcvt)
                    self.images.append(img)
                    self.labels.append(CLASS.REAL if self.cls_folders[self.cls_id][0] == 'S' else CLASS.FAKE)
            return self.images, self.labels
        except StopIteration:
            if self.cls_id < len(self.cls_folders)-1:
                self.cls_id += 1
                self.path_walk = os.walk(os.path.join(self.path_prefix, self.cls_folders[self.cls_id]))
                return self.next_batch()

        return None

    def next_batch_new(self):
        pass

### Feature modules

In [5]:
class WaveletTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, wtname = 'haar', level = 3):
        '''
        waveletname = ['haar', 'db3', 'db5', 'sym2', 'bior5.5', etc.]
        level: total number of decomposite level
        '''
        self.wtname = wtname
        self.level = level
    
    def fit(self, X, y):
        return self

    def transform(self, X):
        features = []
        for imgBGR in tqdm(X):
            img_features = []
            for img in np.moveaxis(imgBGR, -1, 0):
                wt = pywt.wavedec2(data=img, wavelet=self.wtname, level=self.level)
                # appr = wt[0]
                details = wt[1:]
                wt = []
                for levels in details:
                    for detail in levels:
                        wt.append(detail)
                for _wt in wt:
                    img_features.append(np.mean(_wt))
                    img_features.append(np.var(_wt))
            features.append(img_features)
        return features

In [7]:
class LBPTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, num_points = 8, radius = 1, gray=False, noise=False):
        self.num_points = num_points
        self.radius = radius
        self.gray = gray
        self.noise = noise

    def fit(self, X, y):
        return self
    
    def transform(self, X):
        features = []
        for img in tqdm(X):
            if self.gray:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                if self.noise:
                    img = img - wiener(img, 5)
                features.append(self.get_lbp_features((img,)))
            else:
                features.append(self.get_lbp_features(np.moveaxis(img, -1, 0)))
        return features

    def get_lbp_features(self, img_channels):
        def local_binary_pattern(img):
            lbp = skimg.feature.local_binary_pattern(
                img, self.num_points, self.radius, method="nri_uniform").ravel()
            # (hist, bins) = np.histogram(lbp.ravel(), bins=59)
            bins, hist = np.unique(lbp, return_counts=True)
            
            hist = hist / len(lbp)
            return hist
        lbp_features = np.zeros(59)
        for img in img_channels:
            lbp_features += local_binary_pattern(img)
        return lbp_features / len(img_channels)

### Extract & Dump

In [8]:
## Wavelet
# LEVEL = 1
# extractor = WaveletTransformer(level=LEVEL)
# A = np.empty((1,3*3*2*LEVEL))

# LBP
extractor = LBPTransformer()
A = np.empty((1, 59))

In [9]:
b = None
loader = LoadImage(PATH_PREFIX, colorcvt=cv2.COLOR_BGR2HSV)
batch = 1
while(True):
    print("Batch {}".format(batch))
    batch += 1
    
    res = loader.next_batch()
    if res is None:
        break
    images, b = res
    A = np.concatenate((A, extractor.transform(images)), axis=0)

Batch 1
/home/thienn17/Documents/ICL/SingleCaptureImages/EOS600D


100%|██████████| 100/100 [00:20<00:00,  4.95it/s]
100%|██████████| 100/100 [09:51<00:00,  5.92s/it]


Batch 2
/home/thienn17/Documents/ICL/SingleCaptureImages/V550S


100%|██████████| 100/100 [00:07<00:00, 13.24it/s]
100%|██████████| 100/100 [04:11<00:00,  2.51s/it]


Batch 3
/home/thienn17/Documents/ICL/SingleCaptureImages/D40


100%|██████████| 100/100 [00:10<00:00,  9.59it/s]
100%|██████████| 100/100 [04:59<00:00,  3.00s/it]


Batch 4
/home/thienn17/Documents/ICL/SingleCaptureImages/D40/not used


100%|██████████| 5/5 [00:00<00:00,  9.77it/s]
100%|██████████| 5/5 [00:15<00:00,  3.01s/it]


Batch 5
/home/thienn17/Documents/ICL/SingleCaptureImages/V610


100%|██████████| 100/100 [00:08<00:00, 12.27it/s]
100%|██████████| 100/100 [05:02<00:00,  3.03s/it]


Batch 6
/home/thienn17/Documents/ICL/SingleCaptureImages/V550B


100%|██████████| 100/100 [00:06<00:00, 14.54it/s]
100%|██████████| 100/100 [04:08<00:00,  2.49s/it]


Batch 7
/home/thienn17/Documents/ICL/SingleCaptureImages/RX100


100%|██████████| 100/100 [00:21<00:00,  4.70it/s]
100%|██████████| 100/100 [10:46<00:00,  6.47s/it]


Batch 8
/home/thienn17/Documents/ICL/SingleCaptureImages/TZ7


100%|██████████| 100/100 [00:18<00:00,  5.38it/s]
100%|██████████| 100/100 [08:16<00:00,  4.97s/it]


Batch 9
/home/thienn17/Documents/ICL/SingleCaptureImages/EPM2


100%|██████████| 100/100 [00:27<00:00,  3.65it/s]
100%|██████████| 100/100 [12:08<00:00,  7.28s/it]


Batch 10
/home/thienn17/Documents/ICL/SingleCaptureImages/D70S


100%|██████████| 100/100 [00:10<00:00,  9.46it/s]
100%|██████████| 100/100 [04:54<00:00,  2.95s/it]


Batch 11
/home/thienn17/Documents/ICL/RecapturedImages/TZ10


100%|██████████| 180/180 [00:13<00:00, 13.61it/s]
100%|██████████| 180/180 [04:41<00:00,  1.56s/it]


Batch 12
/home/thienn17/Documents/ICL/RecapturedImages/60D


100%|██████████| 180/180 [00:10<00:00, 16.60it/s]
100%|██████████| 180/180 [03:36<00:00,  1.20s/it]


Batch 13
/home/thienn17/Documents/ICL/RecapturedImages/D70s


100%|██████████| 180/180 [00:11<00:00, 15.02it/s]
100%|██████████| 180/180 [04:07<00:00,  1.38s/it]


Batch 14
/home/thienn17/Documents/ICL/RecapturedImages/D3200


100%|██████████| 180/180 [00:12<00:00, 14.67it/s]
100%|██████████| 180/180 [04:11<00:00,  1.40s/it]


Batch 15
/home/thienn17/Documents/ICL/RecapturedImages/600D


100%|██████████| 180/180 [00:11<00:00, 15.23it/s]
100%|██████████| 180/180 [04:07<00:00,  1.38s/it]


Batch 16
/home/thienn17/Documents/ICL/RecapturedImages/RX100


100%|██████████| 180/180 [00:10<00:00, 17.60it/s]
100%|██████████| 180/180 [03:37<00:00,  1.21s/it]


Batch 17
/home/thienn17/Documents/ICL/RecapturedImages/TZ7


100%|██████████| 180/180 [00:11<00:00, 15.25it/s]
100%|██████████| 180/180 [04:12<00:00,  1.40s/it]


Batch 18
/home/thienn17/Documents/ICL/RecapturedImages/EPM2


100%|██████████| 180/180 [00:12<00:00, 14.15it/s]
100%|██████████| 180/180 [04:33<00:00,  1.52s/it]

Batch 19





In [10]:
A = A[1:]
b = np.array(b)
A.shape

(2345, 59)

In [18]:
PATH_DUMP = "./object dump/no union"
os.path.exists(PATH_DUMP)

True

In [19]:
FILE = "lbp_HSV"
np.save(os.path.join(PATH_DUMP, FILE+".npy"), A)
np.save(os.path.join(PATH_DUMP, FILE+"_label.npy"), b)

### Load

In [8]:
PATH_DUMP = "./object dump/no union"
os.path.exists(PATH_DUMP)

True

In [25]:
FILE = "wt_BGR_3lv"
A = np.load(os.path.join(PATH_DUMP, FILE+".npy"))
b = np.load(os.path.join(PATH_DUMP, FILE+"_label.npy"))
A.shape

(2345, 54)

In [35]:
FILE = "wt_BGR_3lv"
FILE2 = "lbp_gray"
assert np.all(np.load(os.path.join(PATH_DUMP, FILE+"_label.npy")) == np.load(os.path.join(PATH_DUMP, FILE2+"_label.npy"))), "Different order"

A = np.concatenate((np.load(os.path.join(PATH_DUMP, FILE+".npy")),
                    np.load(os.path.join(PATH_DUMP, FILE2+".npy"))),
                   axis=1)
b = np.load(os.path.join(PATH_DUMP, FILE+"_label.npy"))
A.shape

(2345, 113)

## Train

### Single fit

In [11]:
A_train, A_test, b_train, b_test = train_test_split(A, b,
                                                    train_size=0.8,
                                                    random_state=42,
                                                    stratify=b)

In [12]:
pipe = make_pipeline(StandardScaler(), SVC())
pipe = pipe.fit(A_train, b_train)

In [13]:
b_pred = pipe.predict(A_test)
print(classification_report(b_test, b_pred))

              precision    recall  f1-score   support

           0       0.96      0.99      0.98       288
           1       0.98      0.94      0.96       181

    accuracy                           0.97       469
   macro avg       0.97      0.96      0.97       469
weighted avg       0.97      0.97      0.97       469



### KFold

In [16]:
def kfold(_estimator):
    # print(_estimator)
    
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=12)
    score = 0
    for k, (i_train, i_test) in enumerate(kf.split(A, b)):
        model = _estimator.fit(A[i_train,], b[i_train])

        y_pred = model.predict(A[i_test,])
        acc = accuracy_score(y_true=b[i_test], y_pred=y_pred)
        score += acc

        print("[Fold {}] Acc: {:.3f}".format(k+1, acc))
        # print(classification_report(y_true=b[i_test], y_pred=y_pred))

    print("Mean acc: {:.3f}\n".format(score/kf.get_n_splits()))

In [17]:
kfold(pipe)

[Fold 1] Acc: 0.957
[Fold 2] Acc: 0.949
[Fold 3] Acc: 0.936
[Fold 4] Acc: 0.955
[Fold 5] Acc: 0.938
Mean acc: 0.947

