In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import glob
import seaborn as sns
import random
from sklearn.preprocessing import LabelBinarizer

In [None]:
import os 
os.listdir('../input/dogcat-release/')

导入图片和预处理

In [None]:
train = glob.glob('../input/dogcat-release/train/*.jpg')
test =glob.glob('../input/dogcat-release/test/*.jpg')

In [None]:
train = np.random.permutation(train)

In [None]:
train[:5]

In [None]:
label_names = set([p.split('/')[-1].split('.')[0] for p in train])
label_names 

In [None]:
labels = [p.split('/')[-1].split('.')[0] for p in train]
labels[:5]

In [None]:
labels = pd.DataFrame(labels,columns=['Type'])
Class = labels['Type'].unique()
Class_dict = dict(zip(Class, range(1,len(Class)+1)))

labels['str'] = labels['Type'].apply(lambda x: Class_dict[x])
lb = LabelBinarizer()
lb.fit(list(Class_dict.values()))
transformed_labels = lb.transform(labels['str'])
y_bin_labels = []  

for i in range(transformed_labels.shape[1]):
    y_bin_labels.append('str' + str(i))
    labels['str' + str(i)] = transformed_labels[:, i]

In [None]:
Class_dict

In [None]:
labels.drop('str',axis=1,inplace=True)
labels.drop('Type',axis=1,inplace=True)
labels = labels.str0.values
labels[:5]

In [None]:
#预处理函数
def preprocess_image(path,label):
    image = tf.io.read_file(path)                           
    image = tf.image.decode_jpeg(image,3)               
    image = tf.image.resize(image,[224,224])       
    image = tf.cast(image/127.5 -1,tf.float32)     

    return image,label       

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((train, labels)) 
dataset = dataset.shuffle(len(train))
dataset

In [None]:
#创建数据集
AUTO = tf.data.experimental.AUTOTUNE
dataset = dataset.map(preprocess_image, num_parallel_calls = AUTO)

In [None]:
#切分数据集
test_count = int(len(train)*0.2)
train_count = len(train) - test_count
train_count,test_count

In [None]:
train_dataset = dataset.skip(test_count) 
test_dataset = dataset.take(test_count)

In [None]:
batch_size = 128

In [None]:
train_dataset = train_dataset.repeat().shuffle(800).batch(batch_size)
train_dataset = train_dataset.prefetch(AUTO)
test_dataset = test_dataset.batch(batch_size)
train_dataset

创建三种迁移模型结合测试

In [None]:
#Xception
conv1 = keras.applications.xception.Xception(weights='imagenet',
                                            include_top=False,
                                            input_shape=(224,224,3),
                                            pooling='avg')

In [None]:
#ResNet152
conv2 = keras.applications.resnet.ResNet152(weights='imagenet',
                                            include_top=False,
                                            input_shape=(224,224,3),
                                            pooling='avg')

In [None]:
#InceptionResNetV2
conv3 = keras.applications.inception_v3.InceptionV3(weights='imagenet',
                                                    include_top=False,
                                                    input_shape=(224,224,3),
                                                    pooling='avg')

In [None]:
conv1.trainable = False
conv2.trainable = False
conv3.trainable = False

In [None]:
conv1.inputs,conv2.inputs,conv3.inputs

In [None]:
conv1.outputs,conv2.outputs,conv3.outputs

In [None]:
def model():
    inputs = tf.keras.layers.Input(shape=(224, 224, 3))
    out1 = conv1(inputs)
    out2 = conv2(inputs)
    out3 = conv3(inputs)
    out = tf.keras.layers.concatenate([out1,out2,out3],axis=1)
    out = tf.keras.layers.Dropout(0.5)(out)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(out)
    
    return tf.keras.Model(inputs=inputs, outputs=output)

In [None]:
model = model()
model.summary()

In [None]:
tf.keras.utils.plot_model(model,show_shapes=True,dpi=300)

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['acc'])

In [None]:
EPOCHS = 10

In [None]:
history = model.fit(train_dataset,
                   steps_per_epoch=train_count//batch_size,
                   epochs=EPOCHS,
                   validation_data=test_dataset,
                   validation_steps=test_count//batch_size,
                   )

模型评估

In [None]:
def plot_history(history):                
    hist = pd.DataFrame(history.history)           
    hist['epoch']=history.epoch
    
    plt.figure()                                     
    plt.xlabel('Epoch')
    plt.ylabel('Binary_crossentropy')               
    plt.plot(hist['epoch'],hist['loss'],
            label='Train Loss')
    plt.plot(hist['epoch'],hist['val_loss'],
            label='Val Loss')                           
    plt.legend()
    
    plt.figure()                                      
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')               
    plt.plot(hist['epoch'],hist['acc'],
            label='Train Acc')
    plt.plot(hist['epoch'],hist['val_acc'],
            label='Val Acc')
    plt.legend()      
    
    plt.show()
    
plot_history(history)          

In [None]:
y_pred = model.predict(test_dataset, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)

In [None]:
y_pred

In [None]:
def preprocess_test(path):
    image = tf.io.read_file(path)                           
    image = tf.image.decode_jpeg(image,3)               
    image = tf.image.resize(image,[224,224])       
    image = tf.cast(image/127.5 -1,tf.float32)     

    return image  

In [None]:
Dir = "../input/dogcat-release/test/"
imgList = os.listdir(Dir)
imgList.sort(key=lambda x: int(x.split('/')[-1].split('.')[0]))
imgList[:5]

In [None]:
test_path = []
for count in range(0, len(imgList)):
    im_name = imgList[count]
    im_path = os.path.join(Dir,im_name)
    test_path.append(im_path)
    print(im_path)

In [None]:
test_path[:5]

In [None]:
val_dataset = tf.data.Dataset.from_tensor_slices(test_path) 
val_dataset = val_dataset.map(preprocess_test, num_parallel_calls = AUTO)
val_dataset = val_dataset.batch(batch_size)

In [None]:
y_pred = model.predict(val_dataset, verbose=1)
y_pred[:10]

In [None]:
pred = pd.DataFrame(y_pred).iloc[:,0].values

In [None]:
pred[:5]

In [None]:
def type_change(data):
    for i in range(data.shape[0]):
        if data[i] > 0.5:
            data[i] = 0.005
        else: data[i] = 0.995
    return data

predict_labels = type_change(pred)

In [None]:
predict_labels[:10]

In [None]:
prediction = pd.DataFrame({"label":predict_labels})
prediction.index += 1 
prediction.to_csv('pred.csv',
                  index_label='id')