# Fake label generator

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 
# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed
import cv2
import numpy as np
import pickle
import matplotlib.pyplot as plt
from io import BytesIO
from glob import glob
from tf_keras.models import load_model
from parserB6 import parser
from cameraB3 import transform_img, eon_intrinsics
from lanes_image_space import transform_points
from common.transformations.model import medmodel_intrinsics

def sYUVs_to_CsYUVs(sYUVs):   # see hevc2yuvh5.py and main.py
    #--- sYUVs.shape = (2, 384, 512)
  H = (sYUVs.shape[1]*2)//3   # = 384x2//3 = 256
  W = sYUVs.shape[2]
  CsYUVs = np.zeros((sYUVs.shape[0], 6, H//2, W//2), dtype=np.uint8)

  CsYUVs[:, 0] = sYUVs[:, 0:H:2, 0::2]
  CsYUVs[:, 1] = sYUVs[:, 1:H:2, 0::2]
  CsYUVs[:, 2] = sYUVs[:, 0:H:2, 1::2]
  CsYUVs[:, 3] = sYUVs[:, 1:H:2, 1::2]
  CsYUVs[:, 4] = sYUVs[:, H:H+H//4].reshape((-1, H//2,W//2))
  CsYUVs[:, 5] = sYUVs[:, H+H//4:H+H//2].reshape((-1, H//2,W//2))
  CsYUVs = np.array(CsYUVs).astype(np.float32)

  return CsYUVs  #--- CsYUVs.shape = (2, 6, 128, 256)




In [4]:
supercombo = load_model(os.path.join(os.path.expanduser('~'), 'aJLL/Model/saved_model/supercombo079.keras'), compile=False)

all_dirs = [os.path.join(os.path.expanduser('~'), 'dataB6', d) for d in os.listdir(os.path.join(os.path.expanduser('~'), 'dataB6'))]

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_dpi = 96
video_shape = (1280, 720)
x_lspace = np.linspace(1, 192, 192)
presicion = np.float32

sYUVs = np.zeros((2, 384, 512), dtype=np.uint8)
Xin1   = np.zeros((1, 8), dtype=presicion)     # DESIRE_LEN = 8
Xin2   = np.zeros((1, 2), dtype=presicion)     # TRAFFIC_CONVENTION_LEN = 2
Xin3   = np.zeros((1, 512), dtype=presicion)   # rnn state

# Xin1[:, 0] = 1.0   # go straight? desire_state_prob[0] = 1.0
Xin2[:, 0] = 1.0   # traffic_convention[0] = 1.0 = left hand drive like in Taiwan

In [2]:
import tf_keras as keras
supercombo = load_model(os.path.join(os.path.expanduser('~'), 'aJLL/Model/saved_model/supercombo079.keras'), compile=False)
supercombo_vision = keras.Model(inputs=supercombo.layers[0].input, outputs=supercombo.layers[8].output)

In [3]:
supercombo_vision.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_imgs (InputLayer)     [(None, 12, 128, 256)]    0         
                                                                 
 permute (Permute)           (None, 128, 256, 12)      0         
                                                                 
 efficientnet-b2 (Functiona  (None, 4, 8, 1408)        6442016   
 l)                                                              
                                                                 
 conv2d (Conv2D)             (None, 4, 8, 32)          45088     
                                                                 
 batch_normalization (Batch  (None, 4, 8, 32)          128       
 Normalization)                                                  
                                                                 
 elu (ELU)                   (None, 4, 8, 32)          0     

In [6]:
# Generate fake labels for the modelB6 efficientnet output.
for did, d in enumerate(all_dirs):
    cap = cv2.VideoCapture(os.path.join(d, "video.hevc"))
    frame_count = 0
    predictions = []

    (ret, previous_frame) = cap.read()   # read 1st frame and set it to previous_frame
    if not ret:
        print("No input frames.")
        raise KeyboardInterrupt
    
    bYUV = cv2.cvtColor(previous_frame, cv2.COLOR_BGR2YUV_I420)   # from big BGR to big YUV
    sYUVs[0] = transform_img(bYUV, from_intr=eon_intrinsics, to_intr=medmodel_intrinsics, yuv=True,
                            output_size=(512, 256))

    msg = " "
    while True:
        ret, current_frame = cap.read()
        if not ret:
            break
        else: 
            frame_count += 1
        frame = cv2.cvtColor(current_frame.copy(), cv2.COLOR_BGR2RGB)
        bYUV = cv2.cvtColor(current_frame, cv2.COLOR_BGR2YUV_I420)
        sYUVs[1] = transform_img(bYUV, from_intr=eon_intrinsics, to_intr=medmodel_intrinsics, yuv=True,
                                output_size=(512, 256))
        
        CsYUVs = sYUVs_to_CsYUVs(sYUVs)
        outs = supercombo_vision(np.vstack(CsYUVs[0:2])[None])
        predictions.append(outs)

        sYUVs[0] = sYUVs[1]

        print(" "*len(msg), end='\r')
        msg = f"Processed {frame_count} frames.".ljust(int(np.log10(frame_count)) + 18)
        print(msg, end='\r')

    with open(os.path.join(d, "fake_effnet_y.pickle"), "wb") as f:
        pickle.dump(predictions, f)
        print(f"Saved {os.path.join(d, "fake_effnet_y.pickle")} ({len(predictions)} samples).")

Saved /home/neil/dataB6/UHD--2018-08-02--08-34-47--37/fake_effnet_y.pickle (1199 samples).
Saved /home/neil/dataB6/UHD--2018-08-02--08-34-47--32/fake_effnet_y.pickle (1199 samples).
Saved /home/neil/dataB6/8bfda98c9c9e4291_2020-05-11--03-00-57/fake_effnet_y.pickle (1201 samples).
Saved /home/neil/dataB6/UHD--2018-08-02--08-34-47--33/fake_effnet_y.pickle (1198 samples).


In [4]:
# Generate fake labels for the modelB6.
heads = {}
tails = {}
for did, d in enumerate(all_dirs):
    fig = plt.figure('Simulator', figsize=(video_shape[0]/out_dpi, video_shape[1]/out_dpi), dpi=out_dpi)
    cap = cv2.VideoCapture(os.path.join(d, "video.hevc"))
    Xin3_temp = np.zeros(512, dtype=presicion)
    out = cv2.VideoWriter(os.path.join(d, "predicted.mp4"), fourcc, 20.0, video_shape)
    frame_count = 0
    predictions = []

    (ret, previous_frame) = cap.read()   # read 1st frame and set it to previous_frame
    if not ret:
        print("No input frames.")
        raise KeyboardInterrupt
    
    bYUV = cv2.cvtColor(previous_frame, cv2.COLOR_BGR2YUV_I420)   # from big BGR to big YUV
    sYUVs[0] = transform_img(bYUV, from_intr=eon_intrinsics, to_intr=medmodel_intrinsics, yuv=True,
                            output_size=(512, 256))
    heads.setdefault(d, sYUVs[0].copy())

    msg = " "
    try:
        while True:
            ret, current_frame = cap.read()
            if not ret:
                tails.setdefault(d, [sYUVs[1].copy(), Xin3_temp])
                break
            else: 
                frame_count += 1
            _buffer = BytesIO()
            frame = cv2.cvtColor(current_frame.copy(), cv2.COLOR_BGR2RGB)
            bYUV = cv2.cvtColor(current_frame, cv2.COLOR_BGR2YUV_I420)
            sYUVs[1] = transform_img(bYUV, from_intr=eon_intrinsics, to_intr=medmodel_intrinsics, yuv=True,
                                    output_size=(512, 256))
            
            CsYUVs = sYUVs_to_CsYUVs(sYUVs)
                #--- Ximgs[bcount].shape = (12, 128, 256)
            Xin3[0] = Xin3_temp
            outs = supercombo([np.vstack(CsYUVs[0:2])[None], Xin1, Xin2, Xin3])
                #--- len(outs) = 12
            Xin3_temp = outs[-1][0]
                #--- np.shape(outs[11][0]) = (512,)  np.shape(outs[11]) = (1, 512)
            predictions.append(outs)

            parsed = parser([x.numpy() for x in outs])
            plt.clf()   # clear figure
            plt.xlim(0, 1164)
            plt.ylim(874, 0)
            plt.plot()
            plt.title("Original Scene")
            new_x_left, new_y_left = transform_points(x_lspace, parsed["lll"][0])
            new_x_path, new_y_path = transform_points(x_lspace, parsed["path"][0])
            new_x_right, new_y_right = transform_points(x_lspace, parsed["rll"][0])
            plt.plot(new_x_left, new_y_left, label='transformed', color='r')
            plt.plot(new_x_path, new_y_path, label='transformed', color='g')
            plt.plot(new_x_right, new_y_right, label='transformed', color='b')
            plt.imshow(frame)
            plt.savefig(_buffer, format="png", dpi=out_dpi)
            _buffer.seek(0)
            img_array = np.asarray(bytearray(_buffer.read()), dtype=np.uint8)
            img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
            out.write(img.copy())

            sYUVs[0] = sYUVs[1]

            print(" "*len(msg), end='\r')
            msg = f"Processed {frame_count} frames.".ljust(int(np.log10(frame_count)) + 18)
            print(msg, end='\r')

        with open(os.path.join(d, "fake_y.pickle"), "wb") as f:
            pickle.dump(predictions, f)
            print(f"Saved {os.path.join(d, "fake_y.pickle")} ({len(predictions)} samples) along with predicted video.")

    finally:
        cap.release()
        plt.clf()

Saved /home/neil/dataB6/UHD--2018-08-02--08-34-47--37/fake_y.pickle (1199 samples) along with predicted video.
Saved /home/neil/dataB6/UHD--2018-08-02--08-34-47--32/fake_y.pickle (1199 samples) along with predicted video.
Saved /home/neil/dataB6/8bfda98c9c9e4291_2020-05-11--03-00-57/fake_y.pickle (1201 samples) along with predicted video.
Saved /home/neil/dataB6/UHD--2018-08-02--08-34-47--33/fake_y.pickle (1198 samples) along with predicted video.


<Figure size 1280x720 with 0 Axes>

In [5]:
# for bridging rnn state between files
for i in tails.keys():
    syuv1, in3 = tails[i]
    sYUVs[0] = syuv1
    Xin3[0] = in3
    file_bridge = {} 

    for j in heads.keys():
        sYUVs[1] = heads[j]
        CsYUVs = sYUVs_to_CsYUVs(sYUVs)
                #--- Ximgs[bcount].shape = (12, 128, 256)
        outs = supercombo([np.vstack(CsYUVs[0:2])[None], Xin1, Xin2, Xin3])
        file_bridge.setdefault(j, outs)

    with open(os.path.join(i, 'file_bridge.pickle'), "wb") as f:
        pickle.dump(file_bridge, f)

In [None]:
# Generate fake labels (only out0, out1, out2, out3, out4, out5, out6) for the modelB6.
# PATH_IDX   = 0      # o0:  192*2+1 = 385
# LL_IDX     = 385    # o1:  192*2+2 = 386
# RL_IDX     = 771    # o2:  192*2+2 = 386
# LEAD_IDX   = 1157   # o3:  11*5+3 = 58
# LONG_X_IDX = 1215   # o4:  100*2 = 200
# LONG_V_IDX = 1415   # o5:  100*2 = 200
# LONG_A_IDX = 1615   # o6:  100*2 = 200

path_to_videos = os.path.join(os.path.expanduser('~'), 'dataOp')
all_videos = glob(os.path.join(path_to_videos, '*', 'video.hevc')) + \
             glob(os.path.join(path_to_videos, '*', '*', 'video.hevc'))

np.random.shuffle(all_videos)

for vid_idx, vid in enumerate(all_videos):
    Xin3_temp = np.zeros(512, dtype=presicion)
    frame_count = 0
    predictions = []

    cap = cv2.VideoCapture(vid)
    (ret, previous_frame) = cap.read()   # read 1st frame and set it to previous_frame
    if not ret:
        print("No input frames.")
        raise KeyboardInterrupt
    
    bYUV = cv2.cvtColor(previous_frame, cv2.COLOR_BGR2YUV_I420)   # from big BGR to big YUV
    sYUVs[0] = transform_img(bYUV, from_intr=eon_intrinsics, to_intr=medmodel_intrinsics, yuv=True,
                            output_size=(512, 256))
    
    if os.path.isfile(vid.replace('video.hevc', 'fake_cat_y.pickle')):
        print(f"{vid.replace('video.hevc', 'fake_cat_y.pickle')} exists. Skipping.")
        continue

    msg = " "
    try:
        while True:
            ret, current_frame = cap.read()
            if not ret:
                break
            else: 
                frame_count += 1

            frame = cv2.cvtColor(current_frame.copy(), cv2.COLOR_BGR2RGB)
            bYUV = cv2.cvtColor(current_frame, cv2.COLOR_BGR2YUV_I420)
            sYUVs[1] = transform_img(bYUV, from_intr=eon_intrinsics, to_intr=medmodel_intrinsics, yuv=True,
                                    output_size=(512, 256))
            
            CsYUVs = sYUVs_to_CsYUVs(sYUVs)
                #--- Ximgs[bcount].shape = (12, 128, 256)
            Xin3[0] = Xin3_temp
            outs = supercombo([np.vstack(CsYUVs[0:2])[None], Xin1, Xin2, Xin3])
                #--- len(outs) = 12
            Xin3_temp = outs[-1][0]
                #--- np.shape(outs[11][0]) = (512,)  np.shape(outs[11]) = (1, 512)
            predictions.append(outs[:7])

            sYUVs[0] = sYUVs[1]

            print(" "*len(msg), end='\r')
            msg = f"Processed {frame_count} frames.".ljust(int(np.log10(frame_count)) + 18)
            print(msg, end='\r')

        with open(vid.replace('video.hevc', 'fake_cat_y.pickle'), "wb") as f:
            pickle.dump(predictions, f)
            print(f"Saved {vid.replace('video.hevc', 'fake_cat_y.pickle')} ({len(predictions)} samples).")

    finally:
        cap.release()

# Model inspection

In [1]:
from train_modelB6 import input_data_generator
import os
import numpy as np

input_dirs = [os.path.join(os.path.expanduser('~'), 'dataB6/UHD--2018-08-02--08-34-47--37'),
            os.path.join(os.path.expanduser('~'), 'dataB6/UHD--2018-08-02--08-34-47--33'),
            os.path.join(os.path.expanduser('~'), 'dataB6/UHD--2018-08-02--08-34-47--32')]
train_data_size, train_xy = input_data_generator(input_dirs[:2], 1, out_seperate=True, no_rnn_state=True)



In [6]:
a = next(train_xy)

In [3]:
from modelB6 import get_model
test = get_model((12, 128, 256), 8, 2, 512, None, out_seperate=False, no_rnn_state=True)
test.load_weights("saved_model/B6BWgru.weights.h5")

In [11]:
for layer in test.layers: 
    print(layer.get_config(), layer.get_weights())

{'batch_input_shape': (None, 12, 128, 256), 'dtype': 'float32', 'sparse': False, 'ragged': False, 'name': 'imgs'} []
{'name': 'permute_2', 'trainable': True, 'dtype': 'float32', 'dims': (2, 3, 1)} []
{'name': 'stem_conv', 'trainable': True, 'dtype': 'float32', 'filters': 32, 'kernel_size': (3, 3), 'strides': (2, 2), 'padding': 'same', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'groups': 1, 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None} [array([[[[-8.88894498e-02, -1.10614590e-01,  4.58244085e-02, ...,
           1.73468813e-02, -1.76970530e-02, -3.76067981e-02],
         [-1.46814547e-02,  5.06

In [7]:
PATH_IDX   = 0      # o0:  192*2+1 = 385
LL_IDX     = 385    # o1:  192*2+2 = 386
RL_IDX     = 771    # o2:  192*2+2 = 386
LEAD_IDX   = 1157   # o3:  11*5+3 = 58
LONG_X_IDX = 1215   # o4:  100*2 = 200
LONG_V_IDX = 1415   # o5:  100*2 = 200
LONG_A_IDX = 1615   # o6:  100*2 = 200
DESIRE_IDX = 1815   # o7:  8
META_IDX   = 1823   # o8:  4
PRED_IDX   = 1827   # o9:  32
POSE_IDX   = 1859   # o10: 12
STATE_IDX  = 1871   # o11: 512
OUTPUT_IDX = 2383
outputs = test(a[0]).numpy()
o0  = outputs[:, PATH_IDX:   LL_IDX]   #--- o0.shape = (1, 385)
o1  = outputs[:, LL_IDX:     RL_IDX]
o2  = outputs[:, RL_IDX:     LEAD_IDX]
o3  = outputs[:, LEAD_IDX:   LONG_X_IDX]
o4  = outputs[:, LONG_X_IDX: LONG_V_IDX]
o5  = outputs[:, LONG_V_IDX: LONG_A_IDX]
o6  = outputs[:, LONG_A_IDX: DESIRE_IDX]
o7  = outputs[:, DESIRE_IDX: META_IDX]
o8  = outputs[:, META_IDX:   PRED_IDX]
o9  = outputs[:, PRED_IDX:   POSE_IDX]
o10 = outputs[:, POSE_IDX:   STATE_IDX]
# o11 = outputs[:, STATE_IDX:  OUTPUT_IDX]
# outputs = [o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11]
outputs = [o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10]

In [8]:
from scipy.special import kl_div
for i in range(len(outputs)):
    print((np.mean(a[1][i]), np.std(a[1][i])), (np.mean(outputs[i]), np.std(outputs[i])), 
          sum(kl_div(np.abs(a[1][i].numpy().flatten()), np.abs(outputs[i].flatten()))))

(-3.4018908, 8.663398) (-1.3636819, 9.941333) 741.1236826212007
(-3.0205321, 7.866997) (-0.8062398, 9.913863) 677.2914708147991
(-3.1725454, 7.752247) (-0.6073906, 9.913296) 1047.8022703246106
(0.30532873, 1.2126476) (0.5391162, 3.285466) 43.97258758870885
(28.316422, 34.08671) (82.00766, 104.47468) 4856.342105698772
(5.584253, 5.8677983) (15.924607, 16.899767) 970.7285360395908
(-0.7581608, 0.67522424) (-0.53688437, 0.5167415) 13.744049896020442
(0.125, 0.31045377) (0.125, 0.32958585) 0.15231414772870266
(0.31436375, 0.29797775) (0.12793744, 0.22159387) 8.433611841872334
(0.125, 0.32811862) (0.125, 0.33009484) 1.1766912343007085
(2.489253, 8.903704) (2.1351862, 9.09891) 15.669963835796807


In [15]:
import tf_keras as keras
import numpy as np
d = keras.layers.Input(1)
t = keras.layers.Input(2)
x = keras.layers.Input(3)
x_concat = keras.layers.Concatenate(axis=-1)([d, t, x])
x_concat = keras.layers.Reshape((1, -1))(x_concat)
gru = keras.layers.GRU(512, return_sequences=False, return_state=False)(x_concat)
m = keras.Model(inputs=[d, t, x], outputs=gru)

In [16]:
a = m.predict(x=[np.array([0.1]).reshape(1, 1), 
             np.array([0.2, 0.3]).reshape(1, 2), 
             np.array([0.4, 0.5, 0.6]).reshape(1, 3)])
a.shape



(1, 512)

In [3]:
a = [1, 2, 3]

[1, 2]