In [1]:
import yaml
import pymongo
from urllib.parse import quote_plus as quote
import numpy as np
import random

collection_name = 'augmented_dataset' #['unique_dataset', 'augmented_dataset', 'initial_dataset']

In [2]:
# Подключимся к нашей коллекции

with open('../env/credsw.yaml', 'r') as file:
    creds_dict = yaml.safe_load(file)
    
url = 'mongodb://{user}:{pw}@{hosts}/?{rs}&authSource={auth_src}&{am}&tls=true&tlsCAFile={cert_file}'.format(
    user=creds_dict['username'],
    pw=quote(creds_dict['password']),
    hosts=creds_dict['host'],
    rs='replicaSet=rs01',
    auth_src=creds_dict['database'],
    am='authMechanism=DEFAULT',
    cert_file='../env/root.crt'
    )

dbs = pymongo.MongoClient(url)

db = dbs[creds_dict['database']]

collection = db[collection_name]

In [3]:
# Выгрузим данные из коллекции
pipeline = [{ "$unwind" : "$boxes" },
            {"$project":{
                "size": "$boxes.size",
                "stacking": "$boxes.stacking",
                "turnover": "$boxes.turnover",
                "loading_size": "$loading_size",
                "density_percent": "$density_percent"
                }},
            {"$group": {"_id": "$_id",
                        "loading_size": {"$first":"$loading_size"},
                        "density_percent": {"$first":"$density_percent"},
                        "boxes": {
                            "$push":  {
                                "size": "$size",
                                "stacking": "$stacking",
                                "turnover": "$turnover"                         
                            }
                            
                        }
            }
            }
            ]
result = collection.aggregate(pipeline, allowDiskUse=True)

In [4]:
# Подготовим train и test datasets
test_size = 1000

dataset = [
    (item['density_percent'],
    [[item['loading_size']['width'], item['loading_size']['height'], item['loading_size']['length'], False, False]] + \
    [ [box['size']['width'], box['size']['height'], box['size']['length'], box['stacking'], box['turnover']] for box in item['boxes']])
    for item in result
]

#y_train, y_test = [round(item[0],0) for item in dataset[0:-test_size]], [round(item[0],0) for item in dataset[-test_size : ]]
y_train, y_test = [[1 if i==round(item[0],0) else 0 for i in range(101)] for item in dataset[0:-test_size]], [[1 if i==round(item[0],0) else 0 for i in range(101)] for item in dataset[-test_size : ]]
X_train, X_test = [item[1] for item in dataset[0:-test_size]], [item[1] for item in dataset[-test_size:]]

In [5]:
# Выровняем размерности входных матриц X
max_size = max(max([len(i) for i in X_train]), max([len(i) for i in X_test]))

for i in range(len(X_train)):
    add_dims = max_size - len(X_train[i])
    X_train[i] = np.concatenate((np.array(X_train[i]), np.zeros((add_dims, 5))))

for i in range(len(X_test)):
    add_dims = max_size - len(X_test[i])
    X_test[i] = np.concatenate((np.array(X_test[i]), np.zeros((add_dims, 5))))


# Base CNN

In [6]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from keras.callbacks import ModelCheckpoint

In [10]:
model = Sequential()

model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(max_size, 5)))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Dropout(0.3))

model.add(Flatten())

model.add(Dense(101, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 2850, 16)          176       
                                                                 
 max_pooling1d_3 (MaxPooling  (None, 1425, 16)         0         
 1D)                                                             
                                                                 
 conv1d_4 (Conv1D)           (None, 1425, 16)          528       
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 712, 16)          0         
 1D)                                                             
                                                                 
 conv1d_5 (Conv1D)           (None, 712, 16)           528       
                                                                 
 max_pooling1d_5 (MaxPooling  (None, 356, 16)         

In [11]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [12]:
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1, save_best_only=True)
hist = model.fit(np.array(X_train),np.array(y_train), batch_size=50, epochs=20, validation_data=(np.array(X_test), np.array(y_test)),callbacks=[checkpointer], verbose=2, shuffle=True)

Epoch 1/20

Epoch 1: val_loss improved from inf to 1.53768, saving model to model.weights.best.hdf5
187/187 - 10s - loss: 2.1130 - accuracy: 0.3354 - val_loss: 1.5377 - val_accuracy: 0.4950 - 10s/epoch - 52ms/step
Epoch 2/20

Epoch 2: val_loss improved from 1.53768 to 0.96616, saving model to model.weights.best.hdf5
187/187 - 18s - loss: 1.4065 - accuracy: 0.5750 - val_loss: 0.9662 - val_accuracy: 0.7560 - 18s/epoch - 95ms/step
Epoch 3/20

Epoch 3: val_loss improved from 0.96616 to 0.75042, saving model to model.weights.best.hdf5
187/187 - 18s - loss: 1.0172 - accuracy: 0.7218 - val_loss: 0.7504 - val_accuracy: 0.8320 - 18s/epoch - 96ms/step
Epoch 4/20

Epoch 4: val_loss improved from 0.75042 to 0.60730, saving model to model.weights.best.hdf5
187/187 - 8s - loss: 0.8230 - accuracy: 0.7854 - val_loss: 0.6073 - val_accuracy: 0.8530 - 8s/epoch - 44ms/step
Epoch 5/20

Epoch 5: val_loss improved from 0.60730 to 0.56707, saving model to model.weights.best.hdf5
187/187 - 9s - loss: 0.7000 - 

In [13]:
score = model.evaluate(np.array(X_test), np.array(y_test), verbose=0)

In [14]:
print(f"Test accuracy: {score[1]}")

Test accuracy: 0.9150000214576721


### Проверка на нескольких случайных примерах из test set

In [15]:
not_guessed = list()
for i in random.sample(range(1000), 1000):
    prediction = model.predict(np.array([X_test[i]]), verbose=0)
    prediction = np.where(prediction[0]==max(prediction[0]))[0][0]
    actual = y_test[i].index(1)
    print(f"Prediction: {prediction}; Actual: {actual}; Guessed: {actual==prediction}")
    if actual!=prediction:
        not_guessed.append(X_test[i])

Prediction: 70; Actual: 70; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 74; Actual: 74; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 83; Actual: 83; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 83; Actual: 83; Guessed: True
Prediction: 93; Actual: 76; Guessed: False
Prediction: 74; Actual: 74; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 83; Actual: 83; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 60; Actual: 60; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 99; Actual: 84; Guessed: False
Prediction: 100; Actual: 100; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 74; Actual: 74; Guessed: True
Prediction: 74; Actual: 74; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 97; Actual: 

In [16]:
lengths = [sum([sum(x)>0 for x in not_guessed[i]]) for i in range(len(not_guessed))]

In [17]:
lengths.sort()

In [18]:
len(lengths)

85

In [19]:
model.save('../API/CNN.h5')