In [79]:
import cv2
import pandas

In [80]:
def load_image(image):
    img = cv2.imread(image)
    return img

def resize_image(image, n):
    '''Makes image n pixels wide, preserving the same ratio'''
    r = n / image.shape[1]
    dim = (n, int(image.shape[0] * r))
    # perform the actual resizing of the image and show it
    resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
    return resized
    
def convert_to_grey(image):
    grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return grey
    
def change_image_ratio(image, w, h):
    image = cv2.resize(image, (w, h))
    return image

def process_file(file_name):
    img = load_image(file_name)
    
    # to greyscale
    grey = convert_to_grey(img)
    shape =  grey.shape

    # calculating ratio
    height, width = shape[0], shape[1]
    ratio = width/height

    # compressing horizontally
    img = change_image_ratio(grey, int(width/ratio), int(height))

    # resizing
    return resize_image(img, 384)

In [26]:
image = load_image('whale_2.jpg')
resize_image(image, 100)
change_image_ratio(image, 2, 3)
convert_to_grey(image)
change_image_ratio(image, 640, 640);

### Image preprocessing code
Images are preprocessed by:

1. Converting to black&white;
2. Compressing horizontally by a factor of 2.15 (the mean aspect ratio);
3. Apply a random image transformation (only for training)
4. Resizing to 384x384;
5. Normalizing to zero mean and unit variance.
These operation are performed by the following code that is later invoked when preparing the corpus.

In [32]:
# loading image
img = load_image('whale_0.jpg')

# changing color to grey
grey = convert_to_grey(img)
shape =  grey.shape

# calculating ratio
height, width = shape[0], shape[1]
ratio = width/height
print('height:', height)
print('width:', width)

# compressing horizontally
w = int(width/ratio)
h = int(height)
img = change_image_ratio(image, w, h)

# resizing
resize_image(img, 384);

whale_0.jpg
height: 237
width: 1050


In [5]:
# loading image
img = load_image('whale_0.jpg')

# changing color to grey
grey = convert_to_grey(img)
shape =  grey.shape

# calculating ratio
height, width = grey.shape[0], grey.shape[1]
ratio = width/height

# compressing horizontally
new_width = int(width/ratio)
img = change_image_ratio(image, new_width, height)

# resizing
resize_image(img, 384);

whale_0.jpg


In [77]:
import numpy as np

df = pandas.read_csv('train.csv')
data_frame = df.Image
print(data_frame.head())
np.random.shuffle(data_frame)

0    0000e88ab.jpg
1    0001f9222.jpg
2    00029d126.jpg
3    00050a15a.jpg
4    0005c1ef8.jpg
Name: Image, dtype: object


In [83]:
data_frame.head()

0    d6a2fcf67.jpg
1    4158e6b4c.jpg
2    a7071c91e.jpg
3    c5a296f97.jpg
4    8622a06dd.jpg
Name: Image, dtype: object

In [86]:
import random

def generator(path, batch=1):
    images = np.random.shuffle(os.listdir(path))
#     for root, dirs, files in os.walk(path):
    for root, dirs, files in images:
        batch_counter = 0
        batch_files = []
        for name in files:
            if name.split('.')[-1] in ['png', 'jpg', 'jpeg']:
                if batch_counter == 0:
                    batch_files = []
                batch_counter += 1

                filename = os.path.join(root, name)
                batch_files.append(process_file(filename))

                print('result:', batch_counter % batch)
                if batch_counter % batch != 0:
                    continue
                batch_counter = 0
                yield np.stack(batch_files, axis=0)

            if batch_counter:
                yield np.stack(batch_files, axis=0)


In [125]:
import random

def generator(path, batch=1):
    images = os.listdir(path)
    print('images',images)
    random.shuffle(images)
    print('images',images)    
    batch_counter = 0
    batch_files = []
    for image in images:
        if image.split('.')[-1] in ['png', 'jpg', 'jpeg']:
            if batch_counter == 0:
                batch_files = []
            batch_counter += 1

            filename = os.path.join(path, image)
            batch_files.append(process_file(filename))
            
            if batch_counter % batch != 0:
                continue
            batch_counter = 0
            yield np.stack(batch_files, axis=0)

        if batch_counter:
            yield np.stack(batch_files, axis=0)

In [126]:
from PIL import Image

for i in generator('test/', 3):
    print(i)

images ['4b6605393.jpg', '4b66039fe.jpg', '4b945d454.jpg', '4b5384c9b.jpg', '4b1315993.jpg', '4b8212256.jpg', '4b8554d24.jpg', '4b77714e6.jpg', '4b3631d1e.jpg', '4b2678972.jpg', '4b542e259.jpg', '4b373390c.jpg', '4b771d44b.jpg', '4b6887756.jpg', '4b6831b73.jpg', '4b95024d2.jpg']
images ['4b6605393.jpg', '4b771d44b.jpg', '4b6831b73.jpg', '4b945d454.jpg', '4b2678972.jpg', '4b95024d2.jpg', '4b8554d24.jpg', '4b3631d1e.jpg', '4b373390c.jpg', '4b542e259.jpg', '4b1315993.jpg', '4b6887756.jpg', '4b5384c9b.jpg', '4b8212256.jpg', '4b66039fe.jpg', '4b77714e6.jpg']
image: 4b6605393.jpg batch_counter: 1
image: 4b771d44b.jpg batch_counter: 2
image: 4b6831b73.jpg batch_counter: 3
[[[113 108 113 ... 111 112 115]
  [111 110 116 ... 110 113 111]
  [106 114 116 ... 114 115 112]
  ...
  [ 90  95  92 ...  90  88  91]
  [ 91  95  95 ...  86  85  84]
  [ 86  90  92 ...  86  86  82]]

 [[160 157 158 ... 229 229 230]
  [174 173 172 ... 248 248 247]
  [175 175 175 ... 251 251 251]
  ...
  [225 222 221 ... 151 1