In [1]:
import yaml
import pymongo
from urllib.parse import quote_plus as quote
import numpy as np
import random

collection_name = 'augmented_dataset' #['unique_dataset', 'augmented_dataset', 'initial_dataset']

In [2]:
# Подключимся к нашей коллекции

with open('../env/credsw.yaml', 'r') as file:
    creds_dict = yaml.safe_load(file)
    
url = 'mongodb://{user}:{pw}@{hosts}/?{rs}&authSource={auth_src}&{am}&tls=true&tlsCAFile={cert_file}'.format(
    user=creds_dict['username'],
    pw=quote(creds_dict['password']),
    hosts=creds_dict['host'],
    rs='replicaSet=rs01',
    auth_src=creds_dict['database'],
    am='authMechanism=DEFAULT',
    cert_file='../env/root.crt'
    )

dbs = pymongo.MongoClient(url)

db = dbs[creds_dict['database']]

collection = db[collection_name]

In [3]:
# Выгрузим данные из коллекции
pipeline = [{ "$unwind" : "$boxes" },
            {"$project":{
                "size": "$boxes.size",
                "stacking": "$boxes.stacking",
                "turnover": "$boxes.turnover",
                "loading_size": "$loading_size",
                "density_percent": "$density_percent"
                }},
            {"$group": {"_id": "$_id",
                        "loading_size": {"$first":"$loading_size"},
                        "density_percent": {"$first":"$density_percent"},
                        "boxes": {
                            "$push":  {
                                "size": "$size",
                                "stacking": "$stacking",
                                "turnover": "$turnover"                         
                            }
                            
                        }
            }
            }
            ]
result = collection.aggregate(pipeline, allowDiskUse=True)

In [4]:
# Подготовим train и test datasets
test_size = 1000

dataset = [
    (item['density_percent'],
    [[item['loading_size']['width'], item['loading_size']['height'], item['loading_size']['length'], False, False]] + \
    [ [box['size']['width'], box['size']['height'], box['size']['length'], box['stacking'], box['turnover']] for box in item['boxes']])
    for item in result
]

#y_train, y_test = [round(item[0],0) for item in dataset[0:-test_size]], [round(item[0],0) for item in dataset[-test_size : ]]
y_train, y_test = [[1 if i==round(item[0],0) else 0 for i in range(101)] for item in dataset[0:-test_size]], [[1 if i==round(item[0],0) else 0 for i in range(101)] for item in dataset[-test_size : ]]
X_train, X_test = [item[1] for item in dataset[0:-test_size]], [item[1] for item in dataset[-test_size:]]

In [5]:
# Выровняем размерности входных матриц X
max_size = max(max([len(i) for i in X_train]), max([len(i) for i in X_test]))

for i in range(len(X_train)):
    add_dims = max_size - len(X_train[i])
    X_train[i] = np.concatenate((np.array(X_train[i]), np.zeros((add_dims, 5))))

for i in range(len(X_test)):
    add_dims = max_size - len(X_test[i])
    X_test[i] = np.concatenate((np.array(X_test[i]), np.zeros((add_dims, 5))))


# Base CNN

In [6]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from keras.callbacks import ModelCheckpoint

In [7]:
model = Sequential()

model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(max_size, 5)))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=16, kernel_size=2, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Dropout(0.3))

model.add(Flatten())

model.add(Dense(101, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 2850, 16)          176       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 1425, 16)         0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 1425, 16)          528       
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 712, 16)          0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 712, 16)           528       
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 356, 16)          0

In [8]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [9]:
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1, save_best_only=True)
hist = model.fit(np.array(X_train),np.array(y_train), batch_size=50, epochs=10, validation_data=(np.array(X_test), np.array(y_test)),callbacks=[checkpointer], verbose=2, shuffle=True)

Epoch 1/10

Epoch 1: val_loss improved from inf to 1.53086, saving model to model.weights.best.hdf5
175/175 - 10s - loss: 1.8965 - accuracy: 0.4089 - val_loss: 1.5309 - val_accuracy: 0.4730 - 10s/epoch - 58ms/step
Epoch 2/10

Epoch 2: val_loss improved from 1.53086 to 0.97285, saving model to model.weights.best.hdf5
175/175 - 9s - loss: 1.2017 - accuracy: 0.6531 - val_loss: 0.9729 - val_accuracy: 0.7500 - 9s/epoch - 51ms/step
Epoch 3/10

Epoch 3: val_loss improved from 0.97285 to 0.80375, saving model to model.weights.best.hdf5
175/175 - 9s - loss: 0.8414 - accuracy: 0.7869 - val_loss: 0.8037 - val_accuracy: 0.7810 - 9s/epoch - 51ms/step
Epoch 4/10

Epoch 4: val_loss improved from 0.80375 to 0.58443, saving model to model.weights.best.hdf5
175/175 - 9s - loss: 0.6623 - accuracy: 0.8306 - val_loss: 0.5844 - val_accuracy: 0.8620 - 9s/epoch - 51ms/step
Epoch 5/10

Epoch 5: val_loss improved from 0.58443 to 0.51151, saving model to model.weights.best.hdf5
175/175 - 9s - loss: 0.5864 - accu

In [11]:
score = model.evaluate(np.array(X_test), np.array(y_test), verbose=0)

In [13]:
print(f"Test accuracy: {score[1]}")

Test accuracy: 0.9330000281333923


### Проверка на нескольких случайных примерах из test set

In [37]:
for i in random.sample(range(1000), 15):
    prediction = model.predict(np.array([X_test[i]]), verbose=0)
    prediction = np.where(prediction[0]==max(prediction[0]))[0][0]
    actual = y_test[i].index(1)
    print(f"Prediction: {prediction}; Actual: {actual}; Guessed: {actual==prediction}")

Prediction: 83; Actual: 83; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 86; Actual: 86; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 74; Actual: 74; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 74; Actual: 74; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 83; Actual: 83; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
Prediction: 100; Actual: 100; Guessed: True
Prediction: 70; Actual: 70; Guessed: True
