In [55]:
import gc
from copy import deepcopy
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard

from libs.util import random_mask
from libs.pconv_model_first_resid import PConvUnet
from libs.properties import properties

# Settings
MAX_BATCH_SIZE = 32

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
properties_dict = properties()
length = properties_dict["length"]

In [57]:
matrix_df = pd.read_csv('./data/trafficV_M.csv', index_col=0, parse_dates=True)

In [58]:
# matrix_df = np.array(matrix_df)

# print(matrix_df.shape)
# print("%d bytes" % (matrix_df.size * matrix_df.itemsize))
# print(np.isnan(matrix_df).sum())

In [59]:
# # 单独测试week_ago, mintue_ago
# week_delta = pd.Timedelta(1, unit='W')
# minute_delta = pd.Timedelta(15, unit='m')


# channel_num = 3
# smooth_time = channel_num-1
# set_up_time = week_delta

# train_df = matrix_df.truncate(before=matrix_df.index.min() + set_up_time)
# train_week_ago_df = matrix_df.loc[train_df.index - week_delta]
# train_minute_ago_df = matrix_df.loc[train_df.index - minute_delta]

# train_df = np.array(train_df).reshape(-1, length, length, 1)
# train_week_ago_df = np.array(train_week_ago_df).reshape(-1, length, length, 1)
# train_minute_ago_df = np.array(train_minute_ago_df).reshape(-1, length, length, 1)


# train_array = np.concatenate((train_df, train_minute_ago_df, train_week_ago_df), axis=3)
# X_train, X_val = train_test_split(train_array, test_size = 0.1, random_state=42)

In [60]:
def createTrainArray(week_history_num=0, minute_history_num=0):
    week_delta_list = [pd.Timedelta(i+1, unit='W') for i in range(week_history_num)]
    minute_delta_list = [pd.Timedelta((i+1)*15, unit='m') for i in range(minute_history_num)]
    delta_list = week_delta_list+minute_delta_list
    print(delta_list)
    
    set_up_time = pd.Timedelta(week_history_num, unit='W')
    train_df = matrix_df.truncate(before=matrix_df.index.min() + set_up_time)
    
    train_ago_array_tuple = tuple([np.array(matrix_df.loc[train_df.index - i]).reshape(-1, length, length, 1) for i in delta_list])
    train_df = np.array(train_df).reshape(-1, length, length, 1)
    train_array = np.concatenate((train_df,)+train_ago_array_tuple, axis=3)
    print(train_array.shape)
    return train_array

In [61]:
week_history_num = 1
minute_history_num = 1

channel_num = week_history_num +minute_history_num +1
smooth_time = channel_num-1

train_array = createTrainArray(week_history_num, minute_history_num)
X_train, X_val = train_test_split(train_array, test_size = 0.1, random_state=42)

[Timedelta('7 days 00:00:00'), Timedelta('0 days 00:15:00')]
(7968, 32, 32, 3)


In [62]:
epoch_steps = X_train.shape[0] // MAX_BATCH_SIZE
val_steps = X_val.shape[0] // MAX_BATCH_SIZE
epoch_steps, val_steps

(224, 24)

In [63]:
class DataGenerator(ImageDataGenerator):
    def flow(self, X, *args, **kwargs):
        while True:
            
            # Get augmentend image samples
            ori = next(super().flow(X, *args, **kwargs))
    
            # Get masks for each image sample
            mask = np.stack([random_mask(ori.shape[1], ori.shape[2], size=0.1, channels=channel_num, smooth_time=smooth_time) for _ in range(ori.shape[0])], axis=0)

            # Apply masks to all image sample
            masked = deepcopy(ori)
#             print(masked.shape)
            masked_mean = masked[mask==1].mean()
            masked[mask==0] = masked_mean

            # Yield ([ori, masl],  ori) training batches
#             print(masked.shape, ori.shape)
            gc.collect()
            yield [masked, mask], ori[:,:,:,:1]
            
train_datagen = DataGenerator()
train_generator = train_datagen.flow(
    X_train, batch_size=MAX_BATCH_SIZE
)

# Create validation generator
val_datagen = DataGenerator()
val_generator = val_datagen.flow(
    X_val, batch_size=MAX_BATCH_SIZE
)

In [64]:
# np.array(next(train_generator)[1])[:,:,:,:1].shape

In [65]:
model = PConvUnet(img_rows=length, img_cols=length, channels=channel_num)

model.fit(
    train_generator, 
    validation_data=val_generator,
    steps_per_epoch = epoch_steps,
    validation_steps = val_steps,
    epochs = 5,
    callbacks=[TensorBoard(log_dir='./mytensorboard/log', histogram_freq=0, write_graph=False, write_images=True)]
)

Epoch 1/1
Epoch 2/2
Epoch 3/3
Epoch 4/4
Epoch 5/5


In [66]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_74 (Conv2D)              (None, 32, 32, 32)   896         input_9[0][0]                    
__________________________________________________________________________________________________
activation_88 (Activation)      (None, 32, 32, 32)   0           conv2d_74[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_10 (MaxPooling2D) (None, 16, 16, 32)   0           activation_88[0][0]              
__________________________________________________________________________________________________
batch_norm

In [67]:
list = []
for _ in range(10):
    list.append(model.evaluate_generator(val_generator, 10))
list

[79404.377734375,
 79552.4484375,
 77464.12109375,
 77472.7203125,
 63979.18203125,
 60530.312890625,
 70689.258984375,
 73427.58828125,
 65557.3833984375,
 64504.3046875]

In [68]:
# import random
# test_num = random.randint(0, 200)
# test = deepcopy(X_train[test_num,np.newaxis,:])

# test_mask = random_mask(test.shape[1], test.shape[2], size=0.1, channels=channel_num, smooth_time=smooth_time)
# test_mask = test_mask[np.newaxis,:]

# test_mask[0,:,:,0].shape
# test[test_mask==0] = test.mean()

# # test_mask.shape
# # plt.imshow(test[0,:,:,0]*255)
# test_res = model.predict([test, test_mask])
# # np.sum((test-test_res)**2)
# np.sum((test[0,:,:,0][test_mask[0,:,:,0] == 0] - test_res[0,:,:,0][test_mask[0,:,:,0] == 0])**2)