In [1]:
import os
import numpy as numpy
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import random
from tqdm import tqdm 

In [2]:
import skimage
from skimage.color import rgb2gray
from skimage.color import gray2rgb
from skimage import measure
from skimage.metrics import structural_similarity as ssim

In [3]:
def SSIM(imageA, imageB):
  dim = (imageA.shape[0], imageA.shape[1])
  A = cv2.resize(imageA, dim, interpolation = cv2.INTER_AREA)
  B = cv2.resize(imageB, dim, interpolation = cv2.INTER_AREA)
  grayA = cv2.cvtColor(A, cv2.COLOR_BGR2GRAY) 
  grayB = cv2.cvtColor(B, cv2.COLOR_BGR2GRAY)
  ans = ssim(grayA, grayB, full = True)
  ret = ans[0] 
  ret += 1 
  ret /= 2 
  return ret 
  

In [83]:
df = pd.read_csv('imd_FULL.csv')
df.head()

Unnamed: 0,image_patch,mask_patch,label,fold,ela
0,1c8xyn/1c8xyn_orig.jpg,,0,0,1c8xyn/1c8xyn_orig_ela.jpg
1,1c8xyn/c9e9r5q_0.jpg,1c8xyn/c9e9r5q_0_mask.jpg,1,0,1c8xyn/c9e9r5q_0_ela.jpg
2,extra_orig/fabola_47344752582.jpg,,0,0,extra_orig/fabola_47344752582_ela.jpg
3,1c2ojm/c9crgzl_0.jpg,1c2ojm/c9crgzl_0_mask.jpg,1,0,1c2ojm/c9crgzl_0_ela.jpg
4,1a84zh/c8v70y3_0.jpg,1a84zh/c8v70y3_0_mask.jpg,1,0,1a84zh/c8v70y3_0_ela.jpg


In [5]:
im = cv2.imread(path + '/' + '1bczrg/1bczrg_orig.jpg')
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

In [6]:
print(SSIM(im, im))

1.0


In [84]:
OUTPUT_DIR = 'Image_Manipulation_Dataset/IMD2020/image_patch_64'
DIR = 'Image_Manipulation_Dataset/IMD2020/imd_data'

In [85]:
def extract_imd(param, increment):
    path = param[0]
    img = cv2.imread(os.path.join(DIR,path))

    name = path.split('/')[-1][:-4]

    patches = []
    d = img.shape
    for i in range(0, d[0], increment):
        for j in range(0, d[1], increment):
            x = i + increment
            y = j + increment
            if x > d[0] or y > d[1]:
                break
            patches.append(img[i : x, j : y])
    random.shuffle(patches)
    # print(len(patches))
    imd_patch = []
    vis = [0 for i in range(len(patches))]
    for i in range(len(patches)):
        if vis[i]:
            continue
        vis[i] = 1
        #plt.imshow(patches[i])
        #plt.show()
        imd_patch.append(patches[i])
        for j in range(i + 1, len(patches)):
            if vis[j]:
                continue
            if SSIM(patches[i], patches[j]) >= 0.65:
                # i guess 87% match onek 
                #print(SSIM(patches[i], patches[j]))
                #print(i, j)
                #plt.imshow(patches[j])
                #plt.show()
                vis[j] = 1
    # print(len(imd_patch))
    os.makedirs(os.path.join(OUTPUT_DIR, name), exist_ok=True)
    for i, x in enumerate(imd_patch):
        cv2.imwrite(os.path.join(OUTPUT_DIR, name, str(i)+'.jpg'), x) 

In [18]:
extract_imd('1c8xyn/1c8xyn_orig.jpg', 64)

28
13


In [86]:
real = df.loc[df['label'] == 0]
real

Unnamed: 0,image_patch,mask_patch,label,fold,ela
0,1c8xyn/1c8xyn_orig.jpg,,0,0,1c8xyn/1c8xyn_orig_ela.jpg
2,extra_orig/fabola_47344752582.jpg,,0,0,extra_orig/fabola_47344752582_ela.jpg
6,extra_orig/42606939@N07_4408765847.jpg,,0,0,extra_orig/42606939@N07_4408765847_ela.jpg
8,extra_orig/gnomeasia_3366728985.jpg,,0,0,extra_orig/gnomeasia_3366728985_ela.jpg
10,extra_orig/brewbooks_15158477613.jpg,,0,0,extra_orig/brewbooks_15158477613_ela.jpg
...,...,...,...,...,...
2350,extra_orig/andressolo_26220435839.jpg,,0,7,extra_orig/andressolo_26220435839_ela.jpg
2351,extra_orig/images_improbables_15182377436.jpg,,0,7,extra_orig/images_improbables_15182377436_ela.jpg
2354,extra_orig/mdpettitt_30889731788.jpg,,0,7,extra_orig/mdpettitt_30889731788_ela.jpg
2357,extra_orig/concursofotografiacomites_544586521...,,0,7,extra_orig/concursofotografiacomites_544586521...


In [87]:
params = real.values
params[:5]

array([['1c8xyn/1c8xyn_orig.jpg', nan, 0, 0,
        '1c8xyn/1c8xyn_orig_ela.jpg'],
       ['extra_orig/fabola_47344752582.jpg', nan, 0, 0,
        'extra_orig/fabola_47344752582_ela.jpg'],
       ['extra_orig/42606939@N07_4408765847.jpg', nan, 0, 0,
        'extra_orig/42606939@N07_4408765847_ela.jpg'],
       ['extra_orig/gnomeasia_3366728985.jpg', nan, 0, 0,
        'extra_orig/gnomeasia_3366728985_ela.jpg'],
       ['extra_orig/brewbooks_15158477613.jpg', nan, 0, 0,
        'extra_orig/brewbooks_15158477613_ela.jpg']], dtype=object)

In [88]:
from functools import partial
from glob import glob
from multiprocessing.pool import Pool
from pathlib import Path

with Pool(processes=os.cpu_count()) as p:
    with tqdm(total=len(params)) as pbar:
        for v in p.imap_unordered(partial(extract_imd, increment=64), params):
            pbar.update()

100%|██████████| 1206/1206 [03:59<00:00,  5.04it/s]


In [89]:
real_names = [x[0].split('/')[-1][:-4] for x in params]

In [99]:
fakes = []
for x in files:
    if x not in real_names:
        fakes.append(x)
print(len(fakes))
fakes[:5]

1153


['c8t9rsw_0', 'c8tf5mq_0', 'c8tt7fg_0', 'c8ttni9_0', 'c8ttxcu_0']

In [98]:
files = os.listdir(OUTPUT_DIR)
print(len(files))
files[:5]

2359


['c8t9rsw_0', 'c8tf5mq_0', 'c8tt7fg_0', 'c8ttni9_0', 'c8ttxcu_0']

In [110]:
rows = []

In [111]:
from PIL import Image
for x in tqdm(real_names):
    # print(x)
    ims = os.listdir(os.path.join(OUTPUT_DIR, x))
    for i in ims:
        rows.append({
            'image':x,
            'image_patch':i,
            'mask_patch':'',
            'label':0
        })

100%|██████████| 1206/1206 [00:00<00:00, 4239.43it/s]


In [101]:
from PIL import Image
for x in tqdm(fakes):
    # print(x)
    ims = [z for z in os.listdir(os.path.join(OUTPUT_DIR, x)) if z.endswith('png')]
    # print(len(ims))
    for z in ims:
        img = Image.open(os.path.join(OUTPUT_DIR, x, z))
        img.save(os.path.join(OUTPUT_DIR, x, z[:-4]+'.jpg'))
        # cv2.imwrite(os.path.join(OUTPUT_DIR, x, z[:-4]+'.jpg'), img)
        os.remove(os.path.join(OUTPUT_DIR, x, z))

    for i in range(len(ims)//2):
        rows.append({
            'image':x,
            'image_patch':str(i)+'.jpg',
            'mask_patch':str(i)+'_gt.jpg',
            'label':1
        })

100%|██████████| 1153/1153 [01:18<00:00, 14.68it/s]


In [114]:
# fake_df = pd.DataFrame(rows)
fake_df

Unnamed: 0,image,image_patch,mask_patch,label
0,c8t9rsw_0,0.jpg,0_gt.jpg,1
1,c8tf5mq_0,0.jpg,0_gt.jpg,1
2,c8tf5mq_0,1.jpg,1_gt.jpg,1
3,c8tf5mq_0,2.jpg,2_gt.jpg,1
4,c8tf5mq_0,3.jpg,3_gt.jpg,1
...,...,...,...,...
29992,c9dwcgf_0,60.jpg,60_gt.jpg,1
29993,c9dwcgf_0,61.jpg,61_gt.jpg,1
29994,c9dwcgf_0,62.jpg,62_gt.jpg,1
29995,c8swtoq_0,0.jpg,0_gt.jpg,1


In [113]:
real_df = pd.DataFrame(rows)
real_df

Unnamed: 0,image,image_patch,mask_patch,label
0,1c8xyn_orig,27.jpg,,0
1,1c8xyn_orig,0.jpg,,0
2,1c8xyn_orig,1.jpg,,0
3,1c8xyn_orig,10.jpg,,0
4,1c8xyn_orig,11.jpg,,0
...,...,...,...,...
89367,1b26rs_orig,5.jpg,,0
89368,1b26rs_orig,6.jpg,,0
89369,1b26rs_orig,7.jpg,,0
89370,1b26rs_orig,8.jpg,,0


In [2]:
df = pd.read_csv('cmfd_FULL.csv')

In [4]:
def check(param):
    if not os.path.exists(os.path.join('Image_Manipulation_Dataset/COCO_CMFD', param[0])):
        print(param)
    # if param[-1] == 1:
    #     if not os.path.exists(os.path.join('Image_Manipulation_Dataset/COCO_CMFD', param[0], param[2])):
    #         print(param)

In [3]:
# df = pd.read_csv('imd_64.csv')
data = df.values

In [5]:
for x in tqdm(data):
    check(x)

100%|██████████| 7094/7094 [00:00<00:00, 10789.14it/s]


In [115]:
df = pd.concat([real_df, fake_df])

In [116]:
df.to_csv('imd_64.csv', index=False)