In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import random

# keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
from keras import optimizers
from keras import models
from keras import layers
from keras.losses import SparseCategoricalCrossentropy
from keras.preprocessing.image import ImageDataGenerator

# sklearn library
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report 

In [None]:
! unzip "../input/dogs-vs-cats-redux-kernels-edition/train.zip" -d train
! unzip "../input/dogs-vs-cats-redux-kernels-edition/test.zip" -d test

In [None]:
# labels
file = os.listdir("./train/train")#指定所有目錄下所有的檔案和目錄名稱 列舉出來 
Labels = list(map(lambda x: x.split('.')[0], file))#取得圖片檔案的主檔案名稱(dog.jpg > dog)

# for filenames
f = Path("./train/train")#把檔案目錄定義出來
File_Path = list(f.glob(r"**/*.jpg"))#獲得指定目錄下的所有jpg文件

# dataframe
File_Path = pd.Series(File_Path).astype(str)#將路徑轉換成STR[string](文字)
Labels = pd.Series(Labels)#Series：用來處理時間序列相關的資料(如感測器資料等)，主要為建立索引的一維陣列。
df = pd.concat([File_Path,Labels],axis=1)# 使用 concat 合併 axis=0 為直向合併 1就橫向合併
df.columns = ['filename', 'category']#指定資料表欄位

In [None]:
train_set, test_data = train_test_split(df, test_size= 0.2, random_state = 42)#『原始的資料』、『比例』、『Seed』
train_data, val_data = train_test_split(train_set, test_size= 0.2, random_state = 42)


print(train_data.shape)#numpy.shape() 函式可以返回陣列的形狀。所謂形狀，我們指的是它可以幫助找到一個陣列的尺寸。它以元組的形式返回形狀，因為我們不能改變元組，就像我們不能改變陣列的尺寸一樣。
print(test_data.shape)
print(val_data.shape)

In [None]:
train_data = train_data.reset_index(drop=True)#reset_index:給予標籤  drop:布林值(boolean)，如果為False，則將替換的索引列添加到數據中。
test_data = test_data.reset_index(drop=True)
val_data = val_data.reset_index(drop=True)

In [None]:
img_size = (128, 128)#圖片大小

In [None]:
img_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)#ImageDataGenerator來生成一個數據生成器，其中rescale引數指定將影象張量的數字縮放 horizontal_flip 是水平翻轉的意思

In [None]:
#把產生好的格式 塞入資料 阿斯
#flow_from_dataframe 它允許輸入一個包含文件名的Pandas數據幀（有/無擴展名）列和一個具有類名的列，直接讀取目錄中的圖像並映射各自的類名
train_gen = img_gen.flow_from_dataframe(
    train_data, 
    x_col='filename',
    y_col='category',
    target_size=img_size,
    class_mode='binary',
    batch_size=32,
    shuffle = False
)
#1.train_data 圖片集
#2.x_col 橫列
#3.y_col 直行
#4.target_size 指定大小
#5.class_mode 把他指定成二進制 "binary"將是1D二進制標籤
#6.batch_size 一批數據的大小（默認 32）
#7.shuffle 是否混洗數據（默認 True） 大洗牌拉

validation_gen = img_gen.flow_from_dataframe(
    val_data, 
    x_col='filename',
    y_col='category',
    target_size=img_size,
    class_mode='binary',
    batch_size=32,
    shuffle = False
)

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(layers.MaxPool2D((2, 2)))#pool 池化層 將陣列挑起

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])


In [None]:
model.summary()#顯示目前網路架構

In [None]:
#方法用於執行訓練過程
history = model.fit(train_gen,
                    validation_data = validation_gen,
                    validation_steps=50,
                    steps_per_epoch=100,
                    epochs = 50,
                    verbose=1)

In [None]:
#accuracy
#記錄了運行輸出 將上述欄位紀錄起來
acc = history.history['acc']
val_acc = history.history['val_acc']

#loss
loss = history.history['loss'] 
val_loss = history.history['val_loss']

In [None]:
#然後畫出來 
plt.figure(figsize=(10, 5))

# visualising Accuracy 
plt.subplot(2, 1, 1)

plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')

plt.ylabel('Accuracy') 
plt.title('Training and Validation Accuracy') 

