In [None]:
from google.colab import drive
drive.mount('/content/drive')

Data Preprocessing

In [None]:
import pandas as pd
df=pd.read_csv("drive/My Drive/Colab Notebooks/Final Project/Task 1/train.csv")
df.head()

In [None]:
traindf = df.sample(frac=0.75, random_state=777)   # 隨機將train.csv裡的檔案分成train:validation=3:1
validf = df.drop(traindf.index)

display(traindf.head())
display(validf.head())
print('各DataFrame 大小:', len(df), len(traindf), len(validf))

In [None]:
testdf=pd.read_csv("drive/My Drive/Colab Notebooks/Final Project/Task 1/testName.csv")
testdf.head()

In [None]:
import os
from keras.preprocessing.image import ImageDataGenerator

# base_dir = '.\\train_img'
# train_dir = os.path.join(base_dir, 'train_img')
train_dir = 'drive/My Drive/Colab Notebooks/Final Project/Task 1/totalDepthPic'
valid_dir = 'drive/My Drive/Colab Notebooks/Final Project/Task 1/totalDepthPic'
test_dir = 'drive/My Drive/Colab Notebooks/Final Project/Task 1/totalDepthPic'

train_datagen = ImageDataGenerator(
        rotation_range=15,
        shear_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        rescale=1./255., 
        # validation_split=0.25
)
train_generator = train_datagen.flow_from_dataframe( 
    dataframe=traindf, 
    directory=train_dir, 
    x_col='Pic Name', 
    y_col='Total rebars', 
    has_ext=False, 
    # subset="training", 
    class_mode="raw", 
    batch_size=20,
    target_size=(150, 150)
    ) 
vaild_datagen = ImageDataGenerator(
        rescale=1./255.
)
validation_generator = vaild_datagen.flow_from_dataframe( 
    dataframe=validf, 
    directory=valid_dir,
    x_col='Pic Name', 
    y_col='Total rebars', 
    has_ext=False, 
    # subset="validation", 
    class_mode="raw",
    batch_size=20,
    target_size=(150, 150)
    )

test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=testdf,
    directory=test_dir,
    x_col='Pic Name',
    target_size=(150, 150),
    color_mode='rgb',
    shuffle=False,
    class_mode=None
)

In [None]:
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

In [None]:
from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3)
)

In [None]:
conv_base.summary()

In [None]:
import numpy as np

def extract_features(generator, sample_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512))
    labels = np.zeros(shape=(sample_count))
    batch_size = generator.batch_size
    # generator = datagen.flow_from_directory(
    #     directory,
    #     target_size=(150, 150),
    #     batch_size=batch_size,
    #     class_mode='binary')
    i = 0

    if generator.class_mode is None:
      for inputs_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i : (i + features_batch.shape[0])] = features_batch   # 避免最後一個batch的shape不合
        i += features_batch.shape[0]
        if i >= sample_count:
          # Note that since generators yield data indefinitely in a loop,
          # we must `break` after every image has been seen once.
          break
      return features
    else:
      for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        # features[i * batch_size : (i + 1) * batch_size] = features_batch
        features[i : (i + features_batch.shape[0])] = features_batch   # 避免最後一個batch的shape不合
        # labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        labels[i : (i + features_batch.shape[0])] = labels_batch
        i += features_batch.shape[0]
        if i >= sample_count:
          break
    print()
    return features, labels

train_features, train_labels = extract_features(train_generator, train_generator.samples)
validation_features, validation_labels = extract_features(validation_generator, validation_generator.samples)
test_features = extract_features(test_generator, test_generator.samples)

In [None]:
train_features = np.reshape(train_features, (train_generator.samples, 4 * 4 * 512))
validation_features = np.reshape(validation_features, (validation_generator.samples, 4 * 4 * 512))
test_features = np.reshape(test_features, (test_generator.samples, 4 * 4 * 512))

In [None]:
from keras import models
from keras import layers
from keras import optimizers
from keras import regularizers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.3))
# model.add(layers.Dense(512, activation='relu'))
# model.add(layers.Dropout(0.3))
model.add(layers.Dense(1, activation=None))

In [None]:
model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
                  loss='mse',
                  metrics=['mse'])

In [None]:
history = model.fit(train_features, train_labels,
                    epochs=100,
                    batch_size=20,
                    validation_data=(validation_features, validation_labels))

In [None]:
import matplotlib.pyplot as plt

# acc = history.history['acc']
# val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss)+1)

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
pred = model.predict(test_features)
pred

In [None]:
pred_int = pred.astype('int')
testdf['Total rebars'] = pred_int
testdf.head

In [None]:
testdf.to_csv("drive/My Drive/Colab Notebooks/Final Project/Task 1/Result/result18.csv", index=False, encoding='utf-8')

In [None]:
model.save('task1_1219.h5')