In [1]:
import yaml
import pymongo
from urllib.parse import quote_plus as quote
import numpy as np
import random

collection_name = 'augmented_dataset' #['unique_dataset', 'augmented_dataset', 'initial_dataset']

In [2]:
# Подключимся к нашей коллекции

with open('../env/credsw.yaml', 'r') as file:
    creds_dict = yaml.safe_load(file)
    
url = 'mongodb://{user}:{pw}@{hosts}/?{rs}&authSource={auth_src}&{am}&tls=true&tlsCAFile={cert_file}'.format(
    user=creds_dict['username'],
    pw=quote(creds_dict['password']),
    hosts=creds_dict['host'],
    rs='replicaSet=rs01',
    auth_src=creds_dict['database'],
    am='authMechanism=DEFAULT',
    cert_file='../env/root.crt'
    )

dbs = pymongo.MongoClient(url)

db = dbs[creds_dict['database']]

collection = db[collection_name]

In [3]:
# Выгрузим данные из коллекции
pipeline = [{ "$unwind" : "$boxes" },
            {"$project":{
                "size": "$boxes.size",
                "stacking": "$boxes.stacking",
                "turnover": "$boxes.turnover",
                "loading_size": "$loading_size",
                "density_percent": "$density_percent"
                }},
            {"$group": {"_id": "$_id",
                        "loading_size": {"$first":"$loading_size"},
                        "density_percent": {"$first":"$density_percent"},
                        "boxes": {
                            "$push":  {
                                "size": "$size",
                                "stacking": "$stacking",
                                "turnover": "$turnover"                         
                            }
                            
                        }
            }
            }
            ]
result = collection.aggregate(pipeline, allowDiskUse=True)

In [4]:
# Подготовим train и test datasets
test_size = 1000

dataset = [
    (item['density_percent'],
    [[item['loading_size']['width'], item['loading_size']['height'], item['loading_size']['length'], False, False]] + \
    [ [box['size']['width'], box['size']['height'], box['size']['length'], box['stacking'], box['turnover']] for box in item['boxes']])
    for item in result
]

#y_train, y_test = [round(item[0],0) for item in dataset[0:-test_size]], [round(item[0],0) for item in dataset[-test_size : ]]
y_train, y_test = [[1 if i==round(item[0],0) else 0 for i in range(101)] for item in dataset[0:-test_size]], [[1 if i==round(item[0],0) else 0 for i in range(101)] for item in dataset[-test_size : ]]
X_train, X_test = [item[1] for item in dataset[0:-test_size]], [item[1] for item in dataset[-test_size:]]

In [5]:
# Выровняем размерности входных матриц X
max_size = max(max([len(i) for i in X_train]), max([len(i) for i in X_test]))
print(f"Max_size: {max_size}")

for i in range(len(X_train)):
    add_dims = max_size - len(X_train[i])
    X_train[i] = np.concatenate((np.array(X_train[i]), np.zeros((add_dims, 5))))

for i in range(len(X_test)):
    add_dims = max_size - len(X_test[i])
    X_test[i] = np.concatenate((np.array(X_test[i]), np.zeros((add_dims, 5))))


Max_size: 2850


# VGGNet-like CNN

In [6]:
from keras.models import Sequential
from keras.layers import Conv1D, AveragePooling1D, Flatten, Dense
from keras.callbacks import ModelCheckpoint
from keras import regularizers, optimizers

In [24]:
base_hidden_units = 32
weight_decay = 1e-4
model = Sequential()

model.add(Conv1D(base_hidden_units, kernel_size=3, padding='same', kernel_regularizer=regularizers.12(weight_decay), input_shape=(max_size, 5)))
model.add(AveragePooling1D(pool_size=2, strides=2, padding='valid'))

model.add(Conv1D(filters=16, kernel_size=2, strides=1, padding='valid', activation='tanh'))
model.add(AveragePooling1D(pool_size=2, strides=2, padding='valid'))

model.add(Conv1D(filters=120, kernel_size=2, strides=1, padding='valid', activation='tanh'))

model.add(Flatten())

model.add(Dense(900, activation='tanh'))

model.add(Dense(101, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 2850, 6)           66        
                                                                 
 average_pooling1d_2 (Averag  (None, 1425, 6)          0         
 ePooling1D)                                                     
                                                                 
 conv1d_4 (Conv1D)           (None, 1424, 16)          208       
                                                                 
 average_pooling1d_3 (Averag  (None, 712, 16)          0         
 ePooling1D)                                                     
                                                                 
 conv1d_5 (Conv1D)           (None, 711, 120)          3960      
                                                                 
 flatten_1 (Flatten)         (None, 85320)            

In [25]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [26]:
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1, save_best_only=True)
hist = model.fit(np.array(X_train),np.array(y_train), batch_size=50, epochs=20, validation_data=(np.array(X_test), np.array(y_test)),callbacks=[checkpointer], verbose=2, shuffle=True)

Epoch 1/20

Epoch 1: val_loss improved from inf to 1.51180, saving model to model.weights.best.hdf5
187/187 - 144s - loss: 1.8410 - accuracy: 0.3890 - val_loss: 1.5118 - val_accuracy: 0.4840 - 144s/epoch - 772ms/step
Epoch 2/20

Epoch 2: val_loss improved from 1.51180 to 1.29531, saving model to model.weights.best.hdf5
187/187 - 144s - loss: 1.2423 - accuracy: 0.5884 - val_loss: 1.2953 - val_accuracy: 0.5240 - 144s/epoch - 772ms/step
Epoch 3/20

Epoch 3: val_loss improved from 1.29531 to 0.99137, saving model to model.weights.best.hdf5
187/187 - 127s - loss: 0.9752 - accuracy: 0.6923 - val_loss: 0.9914 - val_accuracy: 0.7600 - 127s/epoch - 681ms/step
Epoch 4/20


In [None]:
score = model.evaluate(np.array(X_test), np.array(y_test), verbose=0)

In [None]:
print(f"Test accuracy: {score[1]}")

Test accuracy: 0.8600000143051147


### Проверка на нескольких случайных примерах из test set

In [None]:
not_guessed = list()
for i in random.sample(range(1000), 1000):
    prediction = model.predict(np.array([X_test[i]]), verbose=0)
    prediction = np.where(prediction[0]==max(prediction[0]))[0][0]
    actual = y_test[i].index(1)
    print(f"Prediction: {prediction}; Actual: {actual}; Guessed: {actual==prediction}")
    if actual!=prediction:
        not_guessed.append(X_test[i])

Prediction: 70; Actual: 70; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 83; Actual: 83; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 87; Actual: 87; Guessed: True
Prediction: 97; Actual: 100; Guessed: False
Prediction: 83; Actual: 83; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 97; Actual: 79; Guessed: False
Prediction: 70; Actual: 70; Guessed: True
Prediction: 74; Actual: 74; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 100; Actual: 92; Guessed: False
Prediction: 83; Actual: 83; Guessed: True
Prediction: 72; Actual: 59; Guessed: False
Prediction: 74; Actual: 74; Guessed: True
Prediction: 92; Actual: 62; Guessed: False
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actua

In [None]:
lengths = [sum([sum(x)>0 for x in not_guessed[i]]) for i in range(len(not_guessed))]

In [None]:
lengths.sort()

In [None]:
len(lengths)

140

In [None]:
model.save(filepath='../API/CNN.h5', overwrite=True, save_format='h5',)