# 用卷积神经网络 CNN 识别饮料数据集

### 饮料数据集加载

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# 图片输入输出
from skimage import io

# 系统命令
import os

from skimage.transform import resize # resize函数可以调整图像的大小

# file_path字符串存放数据文件夹的地址
train_file_path = "../input/drinks-yygroup/train/"
test_file_path = "../input/drinks-yygroup/test/"

train_label_dict = {}
txt_train = open("../input/drinks-yygroup/train.txt")
while True:
    line = txt_train.readline()
    if line == "":
        break
    train_name, label = line[:-1].split("\t")
    #train_label_dict[train_name] = label
    train_label_dict[train_name] = int(label) - 1
txt_train.close()
print("number of train: ", len(train_label_dict))


test_label_dict = {}
txt_test = open("../input/drinks-yygroup/test.txt")
while True:
    line = txt_test.readline()
    if line == "":
        break
    test_name, label = line[:-1].split("\t")
    #test_label_dict[test_name] = label
    test_label_dict[test_name] = int(label) - 1
txt_test.close()
print("number of test: ", len(test_label_dict))

def plotit(fig1, fig2, label):
    plt.figure(1)
    plt.title("label = " + str(label) + "，shape = " + str(fig1.shape))
    plt.imshow(fig1)
    plt.figure(2)
    plt.title("label = " + str(label) + "，shape = " + str(fig2.shape))
    plt.imshow(fig2)
    plt.show()

x_train = []
y_train = []
for file in os.listdir(train_file_path):
    train = io.imread(os.path.join(train_file_path, file))
    resized_train = resize(train[:, :, :3], (32, 32))
    x_train.append(resized_train)
    train_name = file.split(".")[0]
    y_train.append([train_label_dict[train_name]])
x_train = np.array(x_train)
y_train = np.array(y_train)
print("shape of x_train: ", x_train.shape)
print("shape of y_train: ", y_train.shape)

x_test = []
y_test = []
for file in os.listdir(test_file_path):
    test = io.imread(os.path.join(test_file_path, file))
    resized_test = resize(test[:, :, :3], (32, 32))
    x_test.append(resized_test)
    test_name = file.split(".")[0]
    y_test.append([test_label_dict[test_name]])
#     plotit(test, resized_test, test_label_dict[test_name])
x_test = np.array(x_test)
y_test = np.array(y_test)
print("shape of x_test: ", x_test.shape)
print("shape of y_test: ", y_test.shape)

### 导入库 和 数据预处理

In [None]:
from keras.models import Sequential                               # 序列模型，线性逐层叠加
from keras.layers import Dense, Activation, Flatten, Dropout      # 导入全连接层、激活函数层、二维转一维、Dropout等神经网络常用层
from keras.optimizers import SGD                                  # 导入随机梯度下降优化器
import matplotlib.pyplot as plt # 导入matplotlib库
from keras.utils import to_categorical

# 数据预处理
num_classes = 6          # 数据一共有6类
y_train = to_categorical(y_train, num_classes) # 将训练数据的标签独热编码
y_test = to_categorical(y_test, num_classes)   # 将测试数据的标签独热编码

print("shape of x_train: ", x_train.shape)
print("shape of y_train: ", y_train.shape)
print("shape of x_test: ", x_test.shape)
print("shape of y_test: ", y_test.shape)

### 模型搭建

In [None]:
from keras.layers import Conv2D, MaxPooling2D  # 从keras导入卷积层和最大池化层

model = Sequential()
model.add(Conv2D(8, (3, 3), padding='same',    # 添加卷积层；32：卷积核的个数;（3，3）:卷积核大小；padding='same'：图片卷积后大小不变
                 input_shape=x_train.shape[1:]))# 第一个卷基层需要告诉它输入图片大小，以方便网络推导后面所需参数
model.add(Activation('relu'))                   # 使用relu作为激活函数
model.add(Conv2D(8, (5, 5)))                   # 添加卷积层
model.add(Activation('relu'))                   # 使用relu作为激活函数
model.add(MaxPooling2D(pool_size=(2, 2)))       # 最大池化层，在2*2的区域中选取最大的数

model.add(Flatten())
model.add(Dense(20))
model.add(Activation('relu'))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# 模型编译
model.compile(loss='categorical_crossentropy',  # 损失函数使用多类交叉熵损失函数
              optimizer="adam",                 # 优化器采用adam
              metrics=['accuracy'])             # 用精度作为性能评价指标
model.summary()

### 模型训练

In [None]:
batch_size = 32               # 每次输入8张图片,前向传播求出损失函数平均值，然后反向传播一次更新梯度
epochs = 20                  # 保证所有训练数据被输入网络五次
history = model.fit(x_train, y_train,                   # 训练数据
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,                          # 越大，训练过程中显示的信息越详细             
                    validation_data=(x_test, y_test))   # 验证集
score = model.evaluate(x_test, y_test, verbose=0)       # 模型评估，返回模型的loss和metric
print('Test loss:', score[0])                           # 测试集上模型损失
print('Test accuracy:', score[1])                       # 测试集上模型精度

### 图片预测

In [None]:
import matplotlib.pyplot as plt

y_pred = model.predict(x_test) # 预测测试集中图片的类别
for i in range(x_test.shape[0]):
    pred = [round(y, 2) for y in y_pred[i]]
    plt.title(" prob=" + str(pred) + ", predict=" + str(np.argmax(y_pred[i])) + ", label=" + str(np.argmax(y_test[i])))
    plt.imshow(x_test[i])
    plt.show()

# 用卷积神经网络 CNN 识别CIFAR-10数据集

### Cifar10数据集导入

In [None]:
import pickle
with open("../input/youthaiimageclassification/cifar10.pkl", "rb") as f:
    (x_train, y_train), (x_test, y_test) = pickle.load(f)

    
from keras.models import Sequential                               # 序列模型，线性逐层叠加
from keras.layers import Dense, Activation, Flatten, Dropout      # 导入全连接层、激活函数层、二维转一维、Dropout等神经网络常用层
from keras.optimizers import SGD                                  # 导入随机梯度下降优化器
import matplotlib.pyplot as plt # 导入matplotlib库
from keras.utils import to_categorical

# 数据预处理
print("shape of x_train: ", x_train.shape)
print("shape of y_train: ", y_train.shape)
print("shape of x_test: ", x_test.shape)
print("shape of y_test: ", y_test.shape)
x_train = x_train / 255  # 数据归一化
x_test = x_test / 255
num_classes = 10         # 数据一共有10类
y_train = to_categorical(y_train, num_classes) # 将训练数据的标签独热编码
y_test = to_categorical(y_test, num_classes)   # 将测试数据的标签独热编码

### 模型搭建

In [None]:
from keras.layers import Conv2D, MaxPooling2D  # 从keras导入卷积层和最大池化层

model = Sequential()
model.add(Conv2D(16, (5, 5), padding='same',    # 添加卷积层；16：卷积核的个数;（5，5）:卷积核大小；padding=’same‘：图片卷积后大小不变
                 input_shape=x_train.shape[1:]))# 第一个卷基层需要告诉它输入图片大小，以方便网络推导后面所需参数
model.add(Activation('relu'))                   # 使用relu作为激活函数
model.add(Conv2D(32, (5, 5)))                   # 添加卷积层
model.add(Activation('sigmoid'))                # 使用sigmoid作为激活函数
model.add(MaxPooling2D(pool_size=(2, 2)))       # 最大池化层，在2*2的区域中选取最大的数
model.add(Dropout(0.25))                        # 添加dropout层，dropout层在每一个batchsize训练中随机使网络中一些节点失效(0.25的概率)

model.add(Conv2D(64, (5, 5), padding='same'))   # 添加卷积层；64：卷积核的个数;（5，5）:卷积核大小；padding=’same‘：图片卷积后大小不变
model.add(Activation('relu'))                   # 使用relu作为激活函数
model.add(MaxPooling2D(pool_size=(2, 2)))       # 最大池化层，在2*2的区域中选取最大的数
model.add(Dropout(0.25))                        # 添加dropout层，dropout层在每一个batchsize训练中随机使网络中一些节点失效(0.25的概率)

model.add(Flatten())
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# 模型编译
model.compile(loss='categorical_crossentropy',  # 损失函数使用多类交叉熵损失函数
              optimizer="adam",                 # 优化器采用adam
              metrics=['accuracy'])             # 用精度作为性能评价指标
model.summary()

### 模型训练

In [None]:
'''
batch_size = 32               # 每次输入32张图片,前向传播求出损失函数平均值，然后反向传播一次更新梯度
epochs = 5                    # 保证所有训练数据被输入网络五次
history = model.fit(x_train, y_train,                   # 训练数据
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,                          # 越大，训练过程中显示的信息越详细             
                    validation_data=(x_test, y_test))   # 验证集
score = model.evaluate(x_test, y_test, verbose=0)       # 模型评估，返回模型的loss和metric
print('Test loss:', score[0])                           # 测试集上模型损失
print('Test accuracy:', score[1])                       # 测试集上模型精度
'''