This repository has been archived by the owner on Dec 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
train.py
163 lines (134 loc) · 5.87 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
Copyright (C) Microsoft Corporation. All rights reserved.
Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual,
royalty-free right to use, copy, and modify the software code provided by us
("Software Code"). You may not sublicense the Software Code or any use of it
(except to your affiliates and to vendors to perform work on your behalf)
through distribution, network access, service agreement, lease, rental, or
otherwise. This license does not purport to express any claim of ownership over
data you may have shared with Microsoft in the creation of the Software Code.
Unless applicable law gives you more rights, Microsoft reserves all other
rights not expressly granted herein, whether by implication, estoppel or
otherwise.
THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
# set numpy random seed to get consistent keras results
import numpy as np
np.random.seed(7)
from keras.models import Sequential # noqa: E402
from keras.layers import Conv2D, MaxPooling2D # noqa: E402
from keras.layers import Dropout, Flatten, Dense # noqa: E402
from keras import optimizers # noqa: E402
from keras.preprocessing.image import ImageDataGenerator # noqa: E402
# Split the dataframe into test and train data
def split_data(data_folder, preprocessing_args):
img_size = (
preprocessing_args['image_size']['x'],
preprocessing_args['image_size']['y'])
batch_size = preprocessing_args['batch_size']
print("Getting Data...")
datagen = ImageDataGenerator(
rescale=1./255, # normalize pixel values
validation_split=0.3) # hold back 30% of the images for validation
print("Preparing training dataset...")
train_generator = datagen.flow_from_directory(
data_folder,
target_size=img_size,
batch_size=batch_size,
class_mode='categorical',
subset='training',
seed=7) # set as training data
print("Preparing validation dataset...")
validation_generator = datagen.flow_from_directory(
data_folder,
target_size=img_size,
batch_size=batch_size,
class_mode='categorical',
subset='validation',
seed=7) # set as validation data
classes = sorted(train_generator.class_indices.keys())
print("class names: ", classes)
data = {"train": train_generator,
"test": validation_generator,
"classes": classes}
return data
# Train the model, return the model
def train_model(data, train_args, preprocessing_args):
train_generator = data['train']
validation_generator = data['test']
batch_size = preprocessing_args['batch_size']
# Define a CNN classifier network
# Define the model as a sequence of layers
model = Sequential()
# The input layer accepts an image and applies a convolution
# that uses 32 6x6 filters and a rectified linear unit activation function
model.add(Conv2D(
24,
(6, 6),
input_shape=train_generator.image_shape,
activation='relu'))
# Next we'll add a max pooling layer with a 2x2 patch
model.add(MaxPooling2D(pool_size=(2, 2)))
# We can add as many layers as we think necessary -
# here we'll add another convolution, max pooling, and dropout layer
model.add(Conv2D(48, (6, 6), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# We can add as many layers as we think necessary -
# here we'll add another convolution, max pooling, and dropout layer
model.add(Conv2D(96, (6, 6), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# A dropout layer randomly drops some nodes to
# reduce inter-dependencies (which can cause over-fitting)
model.add(Dropout(0.5))
# Now we'll flatten the feature maps and generate an output
# layer with a predicted probability for each class
model.add(Flatten())
model.add(Dense(train_generator.num_classes, activation='softmax'))
# With the layers defined, we can now compile the model
# for categorical (multi-class) classification
opt = optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
num_epochs = train_args['num_epochs']
history = model.fit_generator(
train_generator,
steps_per_epoch=train_generator.samples // batch_size,
validation_data=validation_generator,
validation_steps=validation_generator.samples // batch_size,
epochs=num_epochs)
return model, history
# Evaluate the metrics for the model
def get_model_metrics(history):
loss = history.history['loss'][-1]
accuracy = history.history['accuracy'][-1]
metrics = {
'loss': loss,
'accuracy': accuracy
}
return metrics
def main():
print("Running train.py")
train_args = {"num_epochs": 10}
preprocessing_args = {
"image_size": {"x": 128, "y": 128},
"batch_size": 30}
data_dir = "data/processed"
data = split_data(data_dir, preprocessing_args)
model, history = train_model(data, train_args, preprocessing_args)
metrics = get_model_metrics(history)
for (k, v) in metrics.items():
print(f"{k}: {v}")
if __name__ == '__main__':
main()