# 垃圾分类模型
### 数据集
出处：https://blog.csdn.net/unique_pei/article/details/105125244
内容：
一共有四个大文件夹，对应着不同的垃圾类别，每个文件夹中有各自包含的垃圾名称及其图片，当前数据集一共有246种垃圾，共包含图片80961张

### 模型概述
#### 问题
由于数据集类别过多，而很多类别的图片数量较少，个别类别图片较多，导致模型难以收敛
#### 方案
训练四个模型，从而降低每个模型的复杂度，提高识别率，和减少收敛时间，每个模型负责识别一个垃圾类别。读取数据时注意数据数量的均等，对数据数量特别大的进行随即丢弃，保证两种数据之间的数量比例大致为1：1。为了提高训练速度，每次直接把所有数据读入，于是每次只读两类的数据，然后进行多次训练从而提高模型的泛化能力。


## 导入库

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from PIL import Image
import os

## 读取数据

In [2]:
def readimage(path1, path2):
    '''
    params:
        path1: data目录
        path2: 参照物目录
    '''
    data = []
    data_1 = [] 
    for name in os.listdir(path1):
        img = Image.open(os.path.join(path1, name))
        img = np.asarray(img)
        data.append(img)
    for name in os.listdir(path2):
        img = Image.open(os.path.join(path2, name))
        img = np.asarray(img)
        data_1.append(img)
    data = np.asarray(data)
    data_1 = np.asarray(data_1)
    
    # 这个分类数据太多, 进行随机剪切
    if data.shape[0] > data_1.shape[0]:
        index = np.arange(data.shape[0])
        np.random.shuffle(index)
        data = data[index]
        data = data[:data_1.shape[0],:,:,:]
    else:
        index = np.arange(data_1.shape[0])
        np.random.shuffle(index)
        data_1 = data_1[index]
        data_1 = data_1[:data.shape[0],:,:,:]
    
    label = np.ones(data.shape[0])
    label_1 = np.zeros(data_1.shape[0])
    
    print('data:',data.shape,'label:', label.shape, 'data_1:',data_1.shape, 'label_1:', label_1.shape)
    data = np.concatenate((data,data_1))
    label = np.concatenate((label,label_1))
    
    
    index=np.arange(len(data))
    np.random.shuffle(index)
    data=data[index]
    label = label[index]
    print(data.shape, label.shape)
    return data, label

## 构建模型

In [3]:
def build_model_part(x, filters):
    conv1 = layers.Conv2D(filters=filters, kernel_size=3, padding='same', activation='relu')(x)
    conv1 = layers.Conv2D(filters=filters, kernel_size=3, padding='same', activation='relu')(conv1)
    conv1 = layers.Conv2D(filters=filters, kernel_size=3, padding='same', activation='relu')(conv1)
    
    conv2 = layers.Conv2D(filters=filters, kernel_size=3, padding='same', activation='relu')(x)
    conv2 = layers.Conv2D(filters=filters, kernel_size=3, padding='same', activation='relu')(conv2)
    
    conv3 = layers.Conv2D(filters=filters, kernel_size=3, padding='same', activation='relu')(x)
    
    conatenate = layers.concatenate([conv1, conv2, conv3])
    
    pool = layers.AveragePooling2D()(conatenate)
    
    norm = layers.BatchNormalization()(pool)
    
    return norm

def build_model(name, shape):
    # (128,128,3)
    inputs = layers.Input(shape)
    # (128,128,3) -> (64,64,3)
    model = build_model_part(inputs, 32)
    # (64,64,3) -> (32,32,3)
    model = build_model_part(model, 64)
    # (32,32,3) -> (16,16,3)
    model = build_model_part(model, 128)
    # (16,16,3) -> (8,8,3)
    model = build_model_part(model, 256)

    # 展平
    model = layers.Flatten()(model)

    # dropout层
    model = layers.Dropout(0.5)(model)
    # 第一层全连接
    model = layers.Dense(512, activation='relu')(model)
    # dropout层
    model = layers.Dropout(0.5)(model)
    # 第二层全连接
    model = layers.Dense(64, activation='relu')(model)
    outputs = layers.Dense(1, activation='sigmoid')(model)

    model = keras.Model(inputs=inputs, outputs=outputs, name=name)
    
    return model

## 主函数

In [7]:
def main(name, epochs, batch_size, model_dir):
    shape = (128,128,3)
    path = {'kehuishou': '/kaggle/input/resize_image/kehuishou',
            'youhai': '/kaggle/input/resize_image/youhai',
            'canyu': '/kaggle/input/resize_image/canyu',
            'qita': '/kaggle/input/resize_image/qita'}
    
    
    model = build_model(name=name,shape=shape)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    for key, value in path.items():
        if key == name:
            continue
        else:
            data, label = readimage(path[name], value)
            model.fit(x=data, y=label, epochs=epochs, validation_split=0.2, batch_size=batch_size)
            save_path = model_dir + '/' + name
            file_name = key + '.h5'
            
            if not os.path.exists(save_path):
                os.makedirs(save_path)
                
            save_path = os.path.join(save_path, file_name)
            model.save(save_path)
            print(key + '数据训练完成')
            del data, label
    print('训练结束')

## 运行

In [8]:
main(name='kehuishou', epochs=10, batch_size=128, model_dir='kaggle/working')

data: (4833, 128, 128, 3) label: (4833,) data_1: (4833, 128, 128, 3) label_1: (4833,)
(9666, 128, 128, 3) (9666,)
Train on 7732 samples, validate on 1934 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


OSError: Unable to create file (unable to open file: name = 'kaggle/working/kehuishou/youhai.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)