# Dogs vs. Cats を CNN で解く

## パッケージ群のインストール

In [1]:
import tensorflow
from tensorflow import keras
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
import os, cv2, random
import numpy as np
import pandas as pd
from matplotlib import ticker
import seaborn as sns

## ファイル一覧の取得

In [3]:
TRAIN_DIR = 'train/'
TEST_DIR = 'test/'

In [4]:
# 訓練用データ (全部、犬だけ、猫だけ)
train_images = [TRAIN_DIR + i for i in os.listdir(TRAIN_DIR)] # use this for full dataset
train_dogs =   [TRAIN_DIR + i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR + i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

# 評価用データ
test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]

In [12]:
# 手っ取り早く試すために、データを減らす (犬と猫 1000個ずつ)
train_images = train_dogs[:1000] + train_cats[:1000]
random.shuffle(train_images)

# 評価用データを 25個に減らす
test_images = test_images[:25]

## 画像を配列に格納

In [27]:
ROWS = 64
COLS = 64
CHANNELS = 3

In [14]:
def read_image(file_path):
    """
    画像をファイルから読み込んで、ROWS * COLS にリサイズして返す
    """
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) # モノクロの場合は cv2.IMREAD_GRAYSCALE
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

In [28]:
def prep_data(image_files):
    """
    画像のファイル名のリストを受け取って、画像を数値化した多次元配列を返す
    """
    count = len(image_files)
    data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.uint8)

    for i, image_file in enumerate(image_files):
        image = read_image(image_file) # COLS * ROWS * CHANNELS
        data[i] = image.T # Transpose (転置) -> CHANNELS * ROWS * COLS
        if i % 250 == 0: print('Processed {} of {}'.format(i, count))
    
    return data

In [29]:
train = prep_data(train_images)
test = prep_data(test_images)

Processed 0 of 2000
Processed 250 of 2000
Processed 500 of 2000
Processed 750 of 2000
Processed 1000 of 2000
Processed 1250 of 2000
Processed 1500 of 2000
Processed 1750 of 2000
Processed 0 of 25


In [30]:
train.shape

(2000, 3, 64, 64)

In [31]:
test.shape

(25, 3, 64, 64)