In [106]:
import os
from pathlib import Path
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage.restoration import denoise_wavelet
from skimage.feature import local_binary_pattern
from sklearn import neighbors, linear_model
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neural_network import MLPClassifier
from scipy.stats import moment, kurtosis, skew
from sklearn.decomposition import PCA
import pywt
import cv2

In [2]:
# Feature Extraction - Local Binary Pattern
def lbp_feature_extraction(im, n_points, radius, method):
    #im_gray = cv2.cvtColor(np.array(im), cv2.COLOR_RGB2GRAY)
    lbp = local_binary_pattern(im[:,:,0], n_points, radius, method)
    # Features returned are the hisogram of resulting data from lbp algorithm
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, radius + 2))
    
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    
    return hist

In [3]:
# Feature Extraction - Denoise Wavelets
def den_wavelets(noise):    
    resp = []
    n_moments = 9
    for i in range(np.shape(noise)[2]):
        coeffs = pywt.dwt2(noise[:,:,i], 'haar')
        cA, (cH, cV, cD) = coeffs
        for j in range(1,n_moments+1):
            resp.append(moment(cH.ravel(), moment=j))
            resp.append(moment(cV.ravel(), moment=j))
            resp.append(moment(cD.ravel(), moment=j))
        resp.append(kurtosis(cH.ravel()))
        resp.append(kurtosis(cV.ravel()))
        resp.append(kurtosis(cD.ravel()))
        resp.append(skew(cH.ravel()))
        resp.append(skew(cV.ravel()))
        resp.append(skew(cD.ravel()))
    return resp

In [4]:
def crop_center_image(im, rows, columns):
    im = rgb_image
    h_mean = np.shape(im)[0]//2
    l_mean = np.shape(im)[1]//2
    rows_inc = rows//2
    columns_inc = columns//2
    im = im[h_mean-rows_inc:h_mean+rows_inc, l_mean-columns_inc:l_mean+columns_inc,:]
    return im

In [5]:
''''seg = train[train['camera'] == cameras[1]]
seg = seg.reset_index()
path_im = train_path / seg.at[0, 'camera'] / seg.at[120, 'fname']
bgr_image = cv2.imread(str(path_im))
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
#im = cv2.resize(rgb_image, (128, 128))
im = rgb_image

print(np.shape(im))

plt.imshow(im)
plt.figure()
start = time.time()
plt.imshow(crop_center_image(im, 512, 512))
end = time.time()
print('%f'%(end-start))
plt.figure()
a = denoise_wavelet(im)
print(np.shape(a))
plt.imshow(a)
plt.figure()
plt.imshow(im-a)'''


"'seg = train[train['camera'] == cameras[1]]\nseg = seg.reset_index()\npath_im = train_path / seg.at[0, 'camera'] / seg.at[120, 'fname']\nbgr_image = cv2.imread(str(path_im))\nrgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)\n#im = cv2.resize(rgb_image, (128, 128))\nim = rgb_image\n\nprint(np.shape(im))\n\nplt.imshow(im)\nplt.figure()\nstart = time.time()\nplt.imshow(crop_center_image(im, 512, 512))\nend = time.time()\nprint('%f'%(end-start))\nplt.figure()\na = denoise_wavelet(im)\nprint(np.shape(a))\nplt.imshow(a)\nplt.figure()\nplt.imshow(im-a)"

In [6]:
is_kaggle = False
# Generating train and test path
if is_kaggle:
    input_path = Path('../input')
else:
    input_path = Path('../Kaggle-camera-model-identification/data')
train_path = input_path / 'train'
test_path = input_path / 'test'

In [7]:
# Listing all directories in trainning path
cameras = os.listdir(train_path)

In [8]:
# Defining size of trainning
size_train = 0.8

In [9]:
# Generating DataFrame of images and labes in the trainning and validation dataset
train_images = []
n_pictures = []

for camera in cameras:
    pic = len(os.listdir(train_path / camera))
    n_pictures.append(pic)
    for fname in sorted(os.listdir(train_path / camera)):
            train_images.append((camera, fname))
train = pd.DataFrame(train_images, columns=['camera', 'fname'])

In [10]:
# Generating DataFrame of images and labes in the testing dataset
test_images = []
for fname in sorted(os.listdir(test_path)):
    test_images.append(fname)
test = pd.DataFrame(test_images, columns=['fname'])

In [11]:
# Procedure to mount feature vectors of trainning and validation
train_lbp = []
train_wav = []
valid_lbp = []
valid_wav = []
train_target = []
valid_target = []

im_mean_vec = []
im_mean = 0

j = 0

start = time.time()
for camera in cameras:
    print("Feature extraction: %s"%(camera))
    for i in range(275):
        print("Example %i"%i, end = '\r')
        seg = train[train['camera'] == camera]
        seg = seg.reset_index()
        path_im = train_path / seg.at[i, 'camera'] / seg.at[i, 'fname']
        bgr_image = cv2.imread(str(path_im))
        rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
        #im = rgb_image
        im = crop_center_image(rgb_image, 512, 512)
        im_clean = denoise_wavelet(im, multichannel=True)
        noise = np.abs(im - 255*im_clean)
        #im = cv2.resize(rgb_image, (512, 512))
        if i < 220:
            train_wav.append(den_wavelets(noise))
            train_lbp.append(lbp_feature_extraction(noise, 24, 8, 'uniform'))
            train_target.append(seg.at[i, 'camera'])
        else:
            valid_wav.append(den_wavelets(noise))
            valid_lbp.append(lbp_feature_extraction(noise, 24, 8, 'uniform'))
            valid_target.append(seg.at[i, 'camera'])
        
        im_mean += noise
    im_mean_vec.append(im_mean/(i+1))
    im_mean = 0
    
    j += 1
    print("Extraction finished!\n")
end = time.time()

print('The time elapsed to extract all features was: %.2f min'%((end-start)/60))

Feature extraction: Motorola-Nexus-6
Extraction finished!

Feature extraction: HTC-1-M7
Extraction finished!

Feature extraction: LG-Nexus-5x
Extraction finished!

Feature extraction: Motorola-Droid-Maxx
Extraction finished!

Feature extraction: Motorola-X
Extraction finished!

Feature extraction: Samsung-Galaxy-S4
Extraction finished!

Feature extraction: Samsung-Galaxy-Note3
Extraction finished!

Feature extraction: iPhone-6
Extraction finished!

Feature extraction: iPhone-4s
Extraction finished!

Feature extraction: Sony-NEX-7
Extraction finished!

The time elapsed to extract all features was: 25.96 min


In [13]:
features_vec = []
valid_vec = []
        
for camera in cameras:
    print("Feature extraction: %s"%(camera))
    for i in range(275):
        print("Example %i"%i, end = '\r')
        seg = train[train['camera'] == camera]
        seg = seg.reset_index()
        path_im = train_path / seg.at[i, 'camera'] / seg.at[i, 'fname']
        bgr_image = cv2.imread(str(path_im))
        rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
        #im = rgb_image
        im = crop_center_image(rgb_image, 512, 512)
        
        im_clean = denoise_wavelet(im, multichannel=True)
        noise = np.abs(im - 255*im_clean)
        
        features_sing = []
        
        for j in range(3):
            aux = np.corrcoef(noise[:,:,j].ravel(),np.array(im_mean_vec)[:,:,:,j].reshape(10,-1))
            features_sing = np.concatenate((features_sing, aux[:,0]))  
        if i < 220:
            features_vec.append(features_sing)
        else:
            valid_vec.append(features_sing)

Feature extraction: Motorola-Nexus-6
Feature extraction: HTC-1-M7
Feature extraction: LG-Nexus-5x
Feature extraction: Motorola-Droid-Maxx
Feature extraction: Motorola-X
Feature extraction: Samsung-Galaxy-S4
Feature extraction: Samsung-Galaxy-Note3
Feature extraction: iPhone-6
Feature extraction: iPhone-4s
Feature extraction: Sony-NEX-7
Example 274

In [15]:
test_feat = []

test_feat_vec = []
test_lbp_vec = []
test_wav_vec = []
test_corr_vec = []

print('fname,camera')
for i in range(np.shape(test)[0]):
    print('%i'%i, end ='\r')
    path_im = test_path /  test.at[i, 'fname']
    bgr_image = cv2.imread(str(path_im))
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
    im = rgb_image
    im_clean = denoise_wavelet(im, multichannel=True)
    noise = np.abs(im - 255*im_clean)
    test_wav = den_wavelets(noise)
    test_wav = np.array(test_wav).reshape(1,-1)

    
    test_lbp = lbp_feature_extraction(noise, 24, 8, 'uniform')
    
    test_sing = []
    
    for j in range(3):
            aux = np.corrcoef(noise[:,:,j].ravel(),np.array(im_mean_vec)[:,:,:,j].reshape(10,-1))
            test_sing = np.concatenate((test_sing, aux[:,0])) 


    test_feat = np.concatenate((test_wav[0], test_sing, test_lbp), axis=0)

    test_feat_vec.append(test_feat)
    test_wav_vec.append(test_wav[0])
    test_lbp_vec.append(test_lbp)
    test_corr_vec.append(test_sing)

fname,camera
2639

In [114]:
train_wav = np.array(train_wav)
train_wav = train_wav.reshape(np.shape(train_wav)[0],-1)

valid_wav = np.array(valid_wav)
valid_wav = valid_wav.reshape(np.shape(valid_wav)[0],-1)

final_feat = np.concatenate((train_wav, train_lbp), axis=1)
final_valid = np.concatenate((valid_wav, valid_lbp), axis=1)

In [133]:
scaler_final = StandardScaler()
final_feat_m = scaler_final.fit_transform(final_feat)
final_valid_m = scaler_final.transform(final_valid)
test_feat_vec_m = scaler_final.transform(np.concatenate((test_wav_vec, test_lbp_vec), axis=1))

scaler = StandardScaler()
train_wav_m = scaler.fit_transform(train_wav)
valid_wav_m = scaler.transform(valid_wav)
test_wav_vec_m = scaler.transform(test_wav_vec)

scaler_lbp = StandardScaler()
train_lbp_m = scaler_lbp.fit_transform(train_lbp)
valid_lbp_m = scaler_lbp.transform(valid_lbp)
test_lbp_vec_m = scaler_lbp.transform(test_lbp_vec)

scaler_corr = StandardScaler()
features_vec_m = scaler_corr.fit_transform(features_vec)
valid_vec_m = scaler_corr.transform(valid_vec)
test_corr_vec_m = scaler_corr.transform(test_corr_vec)

In [146]:
#Predicting using logistic regression
logistic = linear_model.LogisticRegression()
print('Logistic Regression\nTrainning')
print('LBP - Score: %.2f'%(logistic.fit(train_lbp_m, train_target).score(train_lbp_m, train_target)))
print('WAV - Score: %.2f'%(logistic.fit(train_wav_m, train_target).score(train_wav_m, train_target)))
#print('CORR - Score: %.2f'%(logistic.fit(features_vec_m, train_target).score(features_vec_m, train_target)))
print('ALL - Score: %.2f'%(logistic.fit(final_feat_m, train_target).score(final_feat_m, train_target)))
print('Validation')
print('LBP - Score: %.2f'%(logistic.fit(train_lbp_m, train_target).score(valid_lbp_m, valid_target)))
print('WAV - Score: %.2f'%(logistic.fit(train_wav_m, train_target).score(valid_wav_m, valid_target)))
#print('CORR - Score: %.2f'%(logistic.fit(features_vec_m, train_target).score(valid_vec_m, valid_target)))
print('ALL - Score: %.2f\n'%(logistic.fit(final_feat_m, train_target).score(final_valid_m, valid_target)))

# Predicting using KNN
knn = neighbors.KNeighborsClassifier()
print('K-Nearest Neighbors\nTrainning')
print('LBP - Score: %.2f'%(knn.fit(train_lbp_m, train_target).score(train_lbp_m, train_target)))
print('WAV - Score: %.2f'%(knn.fit(train_wav_m, train_target).score(train_wav_m, train_target)))
#print('CORR - Score: %.2f'%(knn.fit(features_vec_m, train_target).score(features_vec_m, train_target)))
print('ALL - Score: %.2f'%(knn.fit(final_feat_m, train_target).score(final_feat_m, train_target)))
print('Validation')
print('LBP - Score: %.2f'%(knn.fit(train_lbp_m, train_target).score(valid_lbp_m, valid_target)))
print('WAV - Score: %.2f'%(knn.fit(train_wav_m, train_target).score(valid_wav_m, valid_target)))
#print('CORR - Score: %.2f'%(knn.fit(features_vec_m, train_target).score(valid_vec_m, valid_target)))
print('ALL - Score: %.2f\n'%(knn.fit(final_feat_m, train_target).score(final_valid_m, valid_target)))

# Predicting using logistic regression
mlp = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(200), random_state=1, max_iter = 300)
print('Multilayer Perceptron\nTrainning')
print('LBP - Score: %.2f'%(mlp.fit(train_lbp_m, train_target).score(train_lbp_m, train_target)))
print('WAV - Score: %.2f'%(mlp.fit(train_wav_m, train_target).score(train_wav_m, train_target)))
#print('CORR - Score: %.2f'%(mlp.fit(features_vec_m, train_target).score(features_vec_m, train_target)))
print('ALL - Score: %.2f'%(mlp.fit(final_feat_m, train_target).score(final_feat_m, train_target)))
print('Validation')
print('LBP - Score: %.2f'%(mlp.fit(train_lbp_m, train_target).score(valid_lbp_m, valid_target)))
print('WAV - Score: %.2f'%(mlp.fit(train_wav_m, train_target).score(valid_wav_m, valid_target)))
#print('CORR - Score: %.2f'%(mlp.fit(features_vec_m, train_target).score(valid_vec_m, valid_target)))
print('ALL - Score: %.2f'%(mlp.fit(final_feat_m, train_target).score(final_valid_m, valid_target)))

Logistic Regression
Trainning
LBP - Score: 0.33
WAV - Score: 0.44
ALL - Score: 0.54
Validation
LBP - Score: 0.32
WAV - Score: 0.41
ALL - Score: 0.54

K-Nearest Neighbors
Trainning
LBP - Score: 0.54
WAV - Score: 0.62
ALL - Score: 0.69
Validation
LBP - Score: 0.31
WAV - Score: 0.42
ALL - Score: 0.55

Multilayer Perceptron
Trainning
LBP - Score: 0.57
WAV - Score: 0.56
ALL - Score: 0.83
Validation
LBP - Score: 0.39
WAV - Score: 0.47
ALL - Score: 0.62


In [143]:
submit = []

knn.fit(final_feat_m, train_target)

pred = knn.predict(test_feat_vec_m)
    
for i in range(np.shape(test)[0]):
    print(pred[i])
    submit.append([test.at[i, 'fname'], pred[i]])

Motorola-X
Motorola-Nexus-6
iPhone-6
LG-Nexus-5x
HTC-1-M7
Motorola-X
Sony-NEX-7
HTC-1-M7
Motorola-Droid-Maxx
Motorola-X
Motorola-X
Sony-NEX-7
HTC-1-M7
Samsung-Galaxy-Note3
Motorola-X
Sony-NEX-7
Motorola-Nexus-6
HTC-1-M7
Samsung-Galaxy-S4
Motorola-X
LG-Nexus-5x
HTC-1-M7
LG-Nexus-5x
Motorola-Droid-Maxx
HTC-1-M7
HTC-1-M7
Motorola-Nexus-6
LG-Nexus-5x
Motorola-Nexus-6
Motorola-Nexus-6
Motorola-Droid-Maxx
Motorola-Droid-Maxx
Samsung-Galaxy-Note3
HTC-1-M7
LG-Nexus-5x
iPhone-6
iPhone-6
Motorola-X
Samsung-Galaxy-Note3
Samsung-Galaxy-Note3
LG-Nexus-5x
LG-Nexus-5x
Motorola-Droid-Maxx
HTC-1-M7
Motorola-Nexus-6
Motorola-Nexus-6
Samsung-Galaxy-Note3
HTC-1-M7
Motorola-Nexus-6
Sony-NEX-7
LG-Nexus-5x
Motorola-Droid-Maxx
HTC-1-M7
iPhone-6
HTC-1-M7
Samsung-Galaxy-Note3
LG-Nexus-5x
Motorola-Nexus-6
iPhone-6
iPhone-6
Sony-NEX-7
LG-Nexus-5x
iPhone-6
iPhone-6
Sony-NEX-7
Samsung-Galaxy-S4
Samsung-Galaxy-Note3
Motorola-X
Motorola-Nexus-6
Motorola-Droid-Maxx
Motorola-Droid-Maxx
LG-Nexus-5x
Sony-NEX-7
Motorola-D

Motorola-Nexus-6
LG-Nexus-5x
HTC-1-M7
Motorola-Droid-Maxx
Motorola-X
HTC-1-M7
Motorola-Nexus-6
iPhone-6
Samsung-Galaxy-Note3
Motorola-X
Motorola-Droid-Maxx
Motorola-Droid-Maxx
Samsung-Galaxy-Note3
Motorola-Nexus-6
Sony-NEX-7
Sony-NEX-7
Sony-NEX-7
Samsung-Galaxy-Note3
Motorola-Nexus-6
HTC-1-M7
Sony-NEX-7
Samsung-Galaxy-Note3
LG-Nexus-5x
Sony-NEX-7
Motorola-Nexus-6
iPhone-6
Motorola-Nexus-6
Motorola-X
LG-Nexus-5x
Motorola-Droid-Maxx
iPhone-6
Motorola-Nexus-6
iPhone-6
Motorola-X
Motorola-Nexus-6
LG-Nexus-5x
Motorola-Nexus-6
Motorola-Nexus-6
iPhone-6
Samsung-Galaxy-Note3
HTC-1-M7
Motorola-X
Motorola-Nexus-6
iPhone-6
Motorola-Droid-Maxx
HTC-1-M7
Sony-NEX-7
Motorola-Nexus-6
iPhone-6
Motorola-Nexus-6
Motorola-Droid-Maxx
HTC-1-M7
Motorola-X
iPhone-6
Samsung-Galaxy-Note3
Motorola-X
Motorola-X
iPhone-6
Motorola-Nexus-6
HTC-1-M7
Motorola-Nexus-6
Sony-NEX-7
LG-Nexus-5x
iPhone-6
iPhone-6
Samsung-Galaxy-Note3
Motorola-Nexus-6
Motorola-X
Samsung-Galaxy-Note3
Samsung-Galaxy-Note3
Samsung-Galaxy-Note3


In [144]:
submit_pd = pd.DataFrame(submit, columns=['fname', 'camera'])

print(submit_pd)

submit_pd.to_csv('submission_final_KNN', index = False)

                      fname                camera
0     img_0002a04_manip.tif            Motorola-X
1     img_001e31c_unalt.tif      Motorola-Nexus-6
2     img_00275cf_manip.tif              iPhone-6
3     img_0034113_unalt.tif           LG-Nexus-5x
4     img_00344b7_unalt.tif              HTC-1-M7
5     img_0062ce8_manip.tif            Motorola-X
6     img_00885c3_manip.tif            Sony-NEX-7
7     img_008a869_unalt.tif              HTC-1-M7
8     img_008fb06_manip.tif   Motorola-Droid-Maxx
9     img_00c98af_manip.tif            Motorola-X
10    img_00d0e3b_unalt.tif            Motorola-X
11    img_00e097a_manip.tif            Sony-NEX-7
12    img_00f61b7_unalt.tif              HTC-1-M7
13    img_010d9d4_unalt.tif  Samsung-Galaxy-Note3
14    img_011d3a0_manip.tif            Motorola-X
15    img_015b5e1_unalt.tif            Sony-NEX-7
16    img_015b7e6_manip.tif      Motorola-Nexus-6
17    img_017184c_unalt.tif              HTC-1-M7
18    img_0184cda_unalt.tif     Samsung-Galaxy-S4
