### Magics

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

%load_ext watermark
%watermark -a 'Salomon Tetelepta' -d -t -v -p keras,tensorflow,numpy,pandas,matplotlib -g

Using TensorFlow backend.


Salomon Tetelepta 2019-07-08 17:58:43 

CPython 3.6.7
IPython 7.2.0

keras 2.2.4
tensorflow 1.11.0
numpy 1.16.2
pandas 0.23.4
matplotlib 2.0.2
Git hash:


### Imports

In [2]:
import os
import sys
from pathlib import Path
# data science
import numpy as np

import logging
import cv2

### Settings

In [3]:
# path settings
project_path = Path(os.getcwd(), os.pardir)
data_path = project_path / "data"
output_path = project_path / "output"

In [5]:
# add project root to pythonpath
sys.path.insert(0, str(project_path / "src"))

# import packages
from utils.identify import *
from utils.log import *

In [34]:
# setup logging
logger = setup_logger(level=logging.INFO)

In [26]:
def valid_image(img_name, valid_ext=["jpg", "jpeg", "png"]):
    if img_name.split(".")[-1].lower() in valid_ext:
        return True
    return False


def yield_folders(path):
        for i in os.listdir(path):
            if os.path.isdir(path / i):
                yield i


def yield_valid_images(path):
    for img_name in os.listdir(path):
        if valid_image(img_name):
            yield (img_name)

In [7]:
def save_cards(cards, path, img_id):
    # write cards to file
    for i, card in enumerate(cards):
        cv2.imwrite(str(path / f"{img_id}_{i}.jpg"), card)

In [8]:
# original_path = data_path / "original"
# for i in yield_folders(original_path):
#     print(i)
# image_path = data_path / "original" / "1-inside-closed-curtains"
# for i in yield_valid_images(image_path):
#     print(i)

In [28]:
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_input_resnet50
from tensorflow.keras.metrics import categorical_accuracy
from tensorflow.keras.optimizers import Adam

# get logger instance
def get_intermediate_model(layer_name="res4a_branch1", input_shape=(128, 96, 3)):
    
    # initialize resnet
    model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)

    # create new model where we get the outputs of a specific layer
    intermediate_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)

    # compile
    intermediate_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=[categorical_accuracy])

    return intermediate_model

In [29]:
import pickle

# get intermediate model resnet
intermediate_model = get_intermediate_model(layer_name="res4a_branch1", input_shape=(128, 96, 3))

# load classifier
with open(str(data_path / "multi_svm.pkl"), 'rb') as handle:
    multi_svm_loaded = pickle.load(handle)



In [30]:
def predict(X, intermediate_model, multi_svm):
    # preprocess features for resnet model
    X_test = preprocess_input_resnet50(X.copy())

    # generate feature vector
    X_test = intermediate_model.predict(X_test)

    # reshape to m x n matrix
    X_test = X_test.reshape(X_test.shape[0], -1)
    
    # return predictions
    return multi_svm.predict(X_test).astype(int)


In [31]:
def predict_str(prediction):
    colors = ['red', 'green', 'purple']
    shapes = ['square', 'squiggle', 'round']
    fills = ['solid', 'open', 'dotted']
    nrs = ['one', 'two', 'three']
    return f"{colors[prediction[0]]}_{shapes[prediction[1]]}_{fills[prediction[2]]}_{nrs[prediction[3]]}"
    return color

In [32]:
# for i, card in enumerate(cards):
#     plt.figure(figsize=(5, 5))
#     plt.title(predict_str(predictions[i]))
#     plt.imshow(cards[i])

In [36]:
image_path = data_path / "original"

for variant_id, variant in enumerate(yield_folders(image_path)):
    for img_id, image_name in enumerate(yield_valid_images(image_path / variant)):
        # load image
        img = cv2.imread(str(image_path / variant / image_name))
        
        # convert to RGB, network was trained on RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # extract cards from image
        logger.debug(f"{variant}_{image_name}")
        cards, bboxes = identify_images(img, (96, 128))
        
        # get predictions
        predictions = predict(cards, intermediate_model=intermediate_model, multi_svm=multi_svm_loaded)
        
        cards, bboxes = identify_images(img, (128, 96))
        variant_id = 2
        for i, card in enumerate(cards):
            p = predict_str(predictions[i])
            card = cv2.cvtColor(card, cv2.COLOR_RGB2BGR)
            #if not os.path.isfile(str(image_path / variant / "cards" / f"{variant_id}_{p}.jpg")):
                #cv2.imwrite(str(image_path / variant / "cards" / f"{variant_id}_{p}.jpg"), card)
            #else:
                #cv2.imwrite(str(image_path / variant / "cards" / f"{variant_id}_{p}_{img_id}.jpg"), card)
            
        # save each card
        #save_cards(cards, image_path / variant / "cards", f"{variant}_{img_id}")
        #break
    #break

Process ForkPoolWorker-429:
Process ForkPoolWorker-431:
Process ForkPoolWorker-430:
Process ForkPoolWorker-432:
Process ForkPoolWorker-426:
Process ForkPoolWorker-427:
Process ForkPoolWorker-428:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/salomon/miniconda3/envs/experiments/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/salomon/miniconda3/envs/experiments/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/salomon/miniconda3/envs/experiments/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/salomon/miniconda3/envs/experiments/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/salomon/mini

KeyboardInterrupt: 

#### create folder structure

In [None]:
# load meta data
df_dataset = pd.read_csv(csv_path)
df_dataset.head()

sample = {'f_color': 'red', 'f_shape': 'square', 'f_fill': 'open', 'f_number': 'three'}
query = prepare_dataset.query_from_dict(sample)
df_dataset.query(query)

print("nr records: ", len(df_dataset))
df_dataset.head()

In [None]:
def create_path_if_doesnt_exist(path):
    if not os.path.exists(path):
        os.makedirs(path)
    return path

In [None]:
create_path_if_doesnt_exist(dataset_path / "train")
create_path_if_doesnt_exist(dataset_path / "validation")

In [1]:
from shutil import copyfile
for idx, row in df_dataset.iterrows():
    if row.variant == 0:
        train_val = "validation"        
    else:
        train_val = "train"
    
    create_path_if_doesnt_exist(dataset_path / train_val / f"{row.card_id}")
    src = dataset_path / str(row.filename)
    dest = dataset_path / train_val / str(row.card_id) / row.filename
    copyfile(src, dest)

NameError: name 'df_dataset' is not defined