# **Introduction to Deep Learning Lab 3**
""" In this Project our team will implement Unet For estimate the Position of Vehicle"""
+ **Reference**
1. Implementation Unet paper
https://arxiv.org/pdf/1505.04597.pdf 
2. Reference Notebook from Kaggle Competition 
Competition_Centernet_U_net_model
https://www.kaggle.com/khaledmgamal/pku-competition-centernet-u-net 
3. In this project we also implement Custom Unet backbone MobileNetV2 Base Style

## **1 Prepare & Processing Data**
"""Here is The Following Steps for Data Processing"""
1. **Reading data** -- extract Pose Information {Yaw, Pitch, Roll, X, Y, Z}
2. **Projection Coordinate** of [3D posiotn in 2D image Dimension] and Rotate {X, Y, Z} value by Euler angles
3. **Image reading-- Resize** [Image, mask_target, Scale_Coordinate]
4. **Image Visualization** Training data [Images, ground truth]
5. **Processing data with Data Augmentation Pipeline**


 ## 2 **Construct Unet-Like Model Architecture**
1. **Writing Custom Data-Generator** 
2. **Model Construct Encoder BASE Efficienet-B0 Backbone**
3. **Model Construct Encoder Efficienet-B1 Backbone**

## **3 Model Evaluate** 
1. Evaluate Model on Testing Set
2. Making Prediction and Submit

### 1. **Reading data** -- extract Pose Information {Yaw, Pitch, Roll, X, Y, Z}

In [None]:
"""Importing DATA from Kaggle Dataset"""
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)
        #print(os.path.join(dirname, filename))
### Data Path
PATH = '../input/pku-autonomous-driving/'
print(os.listdir(PATH))

## Loading the Dataset Images IDs
train = pd.read_csv(PATH + 'train.csv')
train.head()

In [None]:
"""Helper Function """
#Convert yaw, pitch, roll, x, y, z string corressponding values by Dicts type
def str2coords(s, names=['id', 'yaw', 'pitch', 'roll', 'x', 'y', 'z']):
    coords = []
    for l in np.array(s.split()).reshape([-1, 7]):
        coords.append(dict(zip(names, l.astype('float'))))
        if 'id' in coords[-1]:
            coords[-1]['id'] = int(coords[-1]['id'])
    return coords

# Will use this model later
points_df = pd.DataFrame()
for col in ['x', 'y', 'z', 'yaw', 'pitch', 'roll']:
    arr = []
    for ps in train['PredictionString']:
        coords = str2coords(ps)
        arr += [c[col] for c in coords]
    points_df[col] = arr
xzy_slope = LinearRegression()
X = points_df[['x', 'z']]
y = points_df['y']
xzy_slope.fit(X, y)

### 2. **Projection Coordinate** of [3D posiotn in 2D image Dimension] and Rotate {X, Y, Z} value by Euler angles

In [None]:
"""Function to Project 3D Position on 2D image"""
import cv2
# From camera.zip
camera_matrix = np.array([[2304.5479, 0,  1686.2379],
                          [0, 2305.8757, 1354.9849],
                          [0, 0, 1]], dtype=np.float32)
camera_matrix_inv = np.linalg.inv(camera_matrix)


def get_img_coords(s):
    '''
    Args
    s is the string X, Y, Z, yall..
    Return is two arrays:
        xs: x coordinates in the image
        ys: y coordinates in the image
    '''
    #convert string to Dicts
    coords = str2coords(s)
    xs = [c['x'] for c in coords]
    ys = [c['y'] for c in coords]
    zs = [c['z'] for c in coords]
    P = np.array(list(zip(xs, ys, zs))).T
    img_p = np.dot(camera_matrix, P).T
    img_p[:, 0] /= img_p[:, 2]
    img_p[:, 1] /= img_p[:, 2]

    img_xs = img_p[:, 0]
    img_ys = img_p[:, 1]
    img_zs = img_p[:, 2] # z = Distance from the camera
    return img_xs, img_ys


"""3D visualization and Rotation matrix"""

from math import sin, cos
## Convert Eluer angle to Roation matrix 
# https://phas.ubc.ca/~berciu/TEACHING/PHYS206/LECTURES/FILES/euler.pdf
def euler_to_Rot(yaw, pitch, roll): 
    Y = np.array([[cos(yaw), 0, sin(yaw)],
                  [0, 1, 0],
                  [-sin(yaw), 0, cos(yaw)]])
    P = np.array([[1, 0, 0],
                  [0, cos(pitch), -sin(pitch)],
                  [0, sin(pitch), cos(pitch)]])
    R = np.array([[cos(roll), -sin(roll), 0],
                  [sin(roll), cos(roll), 0],
                  [0, 0, 1]])
    return np.dot(Y, np.dot(P, R))
## Reading image function 
def imread(path, fast_mode=False):
    img = cv2.imread(path)
    if not fast_mode and img is not None and len(img.shape) == 3:
        img = np.array(img[:, :, ::-1])
    return img

## Anotation on the Image 
def draw_line(image, points): 
    color = (255, 0, 0)
    cv2.line(image, tuple(points[0][:2]), tuple(points[3][:2]), color, 16)
    cv2.line(image, tuple(points[0][:2]), tuple(points[1][:2]), color, 16)
    cv2.line(image, tuple(points[1][:2]), tuple(points[2][:2]), color, 16)
    cv2.line(image, tuple(points[2][:2]), tuple(points[3][:2]), color, 16)
    return image

## Draw dot on the Image 
def draw_points(image, points):
    for (p_x, p_y, p_z) in points:
        cv2.circle(image, (p_x, p_y), int(1000 / p_z), (0, 255, 0), -1)
    return image


## Visualization Checking the Image 3D project on 2D and Annotation on the image
def visualize(img, coords):
    # You will also need functions from the previous cells
    x_l = 1.02
    y_l = 0.80
    z_l = 2.31
    
    img = img.copy()
    for point in coords:
        # Get values
        x, y, z = point['x'], point['y'], point['z']
        yaw, pitch, roll = -point['pitch'], -point['yaw'], -point['roll']
        # Math
        center_point = np.array([x, y, z]).reshape([1,3])
        Rotation_matrix = euler_to_Rot(yaw, pitch, roll).T#Rotation matrix to transform from car coordinate frame to camera coordinate frame
        bounding_box = np.array([[x_l, -y_l, -z_l],
                      [x_l, -y_l, z_l],
                      [-x_l, -y_l, z_l],
                      [-x_l, -y_l, -z_l],
                     ]).T
        img_cor_points = np.dot(camera_matrix, np.dot(Rotation_matrix,bounding_box)+center_point.T)
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]
        img_cor_points = img_cor_points.astype(int)
        # Drawing
        img = draw_line(img, img_cor_points)
        img_point=np.dot(camera_matrix, center_point.T).T
        img_point[:, 0] /= img_point[:, 2]
        img_point[:, 1] /=img_point[:, 2]
        img = draw_points(img,img_point.astype(int))
    
    return img


n_rows = 2
import matplotlib.pyplot as plt
for idx in range(n_rows):
    fig, axes = plt.subplots(1, 2, figsize=(12,8))
    img = imread(PATH + 'train_images/' + train['ImageId'].iloc[20+idx] + '.jpg')
    axes[0].imshow(img)
    img_vis = visualize(img, str2coords(train['PredictionString'].iloc[20+idx]))
    axes[1].imshow(img_vis)
    plt.show()

### 3. **Image reading-- Resize** [Image, mask_target, Scale_Coordinate]
1. Regr_Process: process x, y, z, yaw, pitch, roll. 
2. Regr-Back undo the process x=x*100 get the prediction from the network
3. Extract_coords: extract infromation from network 
4. x= (x-img.shape[0]//2)*IMGHEIGHT / (img,shape[0]//2) /MODELSCALE-[y will be the same only changex]
5. Creating Dataset and Dataloader for the dataset


In [None]:
IMG_WIDTH = 1024
IMG_HEIGHT = IMG_WIDTH // 16 * 5
MODEL_SCALE = 8
#rotate the image object
def rotate(x, angle): 
    x= x+angle
    x= x- (x+np.pi) //(2*np.pi) *2 *np.pi
    return x

#regr preprocess image 
def _regr_preprocess(regr_dict, flip=False):
    if flip:
        for k in ['x', 'pitch', 'roll']:
            regr_dict[k] = -regr_dict[k]
    for name in ['x', 'y', 'z']:
        regr_dict[name] = regr_dict[name] / 100
    regr_dict['roll'] = rotate(regr_dict['roll'], np.pi)
    regr_dict['pitch_sin'] = sin(regr_dict['pitch'])
    regr_dict['pitch_cos'] = cos(regr_dict['pitch'])
    regr_dict.pop('pitch')
    regr_dict.pop('id')
    return regr_dict


def _regr_back(regr_dict):
    for name in ['x', 'y', 'z']:
        regr_dict[name] = regr_dict[name] * 100
    regr_dict['roll'] = rotate(regr_dict['roll'], -np.pi)
    
    pitch_sin = regr_dict['pitch_sin'] / np.sqrt(regr_dict['pitch_sin']**2 + regr_dict['pitch_cos']**2)
    pitch_cos = regr_dict['pitch_cos'] / np.sqrt(regr_dict['pitch_sin']**2 + regr_dict['pitch_cos']**2)
    regr_dict['pitch'] = np.arccos(pitch_cos) * np.sign(pitch_sin)
    
    return regr_dict


## Rescale image 
def preprocess_image(img, flip=False):
    img = img[img.shape[0] // 2:]
    bg = np.ones_like(img) * img.mean(1, keepdims=True).astype(img.dtype)
    bg = bg[:, :img.shape[1] // 6]
    img = np.concatenate([bg, img, bg], 1)
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    if flip:
        img = img[:,::-1]
    return (img / 255).astype('float32')


## Rescale mask and project the object coordinate 
def get_mask_and_regr(img, labels, flip=False):
    mask = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WIDTH // MODEL_SCALE], dtype='float32')
    regr_names = ['x', 'y', 'z', 'yaw', 'pitch', 'roll']
    regr = np.zeros([IMG_HEIGHT // MODEL_SCALE, IMG_WIDTH // MODEL_SCALE, 7], dtype='float32')
    coords = str2coords(labels)
    xs, ys = get_img_coords(labels)
    
    for x, y, regr_dict in zip(xs, ys, coords):
        x, y = y, x
        #print(x,img.shape[0] // 2,y, img.shape[1] // 6)
        x = (x - img.shape[0] // 2) * IMG_HEIGHT / (img.shape[0] // 2) / MODEL_SCALE
        #x=(x*1/2)*(IMG_HEIGHT / MODEL_SCALE)/(img.shape[0] // 2)
        x = np.round(x).astype('int')
        y = (y + img.shape[1] // 6) * IMG_WIDTH / (img.shape[1] * 4/3) / MODEL_SCALE
        #y=(y* 4/3)*(IMG_WIDTH / MODEL_SCALE)/((img.shape[1] * 3/4) )

        y = np.round(y).astype('int')
        #print(x,y)

        if x >= 0 and x < IMG_HEIGHT // MODEL_SCALE and y >= 0 and y < IMG_WIDTH // MODEL_SCALE:
            mask[x, y] = 1
            regr_dict = _regr_preprocess(regr_dict, flip)
            regr[x, y] = [regr_dict[n] for n in sorted(regr_dict)]
            
    if flip:
        mask = np.array(mask[:,::-1])
        regr = np.array(regr[:,::-1])
        
    return mask, regr



#### **Helper Functions To Convert back 2D dimention to 3D dimension COORDINATE & ANGLE**
1. Taking mask and regression mask image from prediction result -> predict new coorrdinate --> project new coordinate and visualization the result

In [None]:
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression

# Reference from https://www.kaggle.com/theshockwaverider/eda-visualization-baseline
def convert_3d_to_2d(x, y, z, fx = 2304.5479, fy = 2305.8757, cx = 1686.2379, cy = 1354.9849):
    return x * fx / z + cx, y * fy / z + cy

## Function to return New extract coordinate 
def optimize_xy(r,c,x0,y0, z0, flipped=False):
    def distance_fn(xyz):
        x, y,z = xyz
        xx= -x if flipped else x
        slope_err = (xzy_slope.predict([[xx,z]])[0] - y)**2
        x, y = convert_3d_to_2d(x, y, z)
        y, x = x, y
        x = (x - IMG_SHAPE[0] // 2) * IMG_HEIGHT / (IMG_SHAPE[0] // 2) / MODEL_SCALE
        y = (y + IMG_SHAPE[1] // 6) * IMG_WIDTH / (IMG_SHAPE[1] * 4 / 3) / MODEL_SCALE
        return max(0.2, (x-r)**2 + (y-c)**2) + max(0.4, slope_err)
    
    res = minimize(distance_fn, [x0, y0, z0], method='Powell')
    x_new, y_new, z_new = res.x
    return x_new, y_new, z_new

## Setting Thresshold for low prediction accuracy coordinate  
def clear_duplicates(coords):
    for c1 in coords:
        xyz1 = np.array([c1['x'], c1['y'], c1['z']])
        for c2 in coords:
            xyz2 = np.array([c2['x'], c2['y'], c2['z']])
            distance = np.sqrt(((xyz1 - xyz2)**2).sum())
            if distance < DISTANCE_THRESH_CLEAR:
                if c1['confidence'] < c2['confidence']:
                    c1['confidence'] = -1
    return [c for c in coords if c['confidence'] > 0]


## Extract coordinate from result 
def extract_coords(prediction, flipped=False):
    logits = prediction[0]
    
    regr_output = prediction[1:]
    points = np.argwhere(logits > 0)
    col_names = sorted(['x', 'y', 'z', 'yaw', 'pitch_sin', 'pitch_cos', 'roll'])
    coords = []
    for r, c in points:
        regr_dict = dict(zip(col_names, regr_output[:, r, c]))
        coords.append(_regr_back(regr_dict))
        coords[-1]['confidence'] = 1 / (1 + np.exp(-logits[r, c]))
        coords[-1]['x'], coords[-1]['y'], coords[-1]['z'] = \
                optimize_xy(r, c,
                            coords[-1]['x'],
                            coords[-1]['y'],
                            coords[-1]['z'], flipped)
    coords = clear_duplicates(coords)
    return coords

## Function mapping name and values
def coords2str(coords, names=['yaw', 'pitch', 'roll', 'x', 'y', 'z', 'confidence']):
    s = []
    for c in coords:
        for n in names:
            s.append(str(c.get(n, 0)))
    return ' '.join(s)


### **4. Image Visualization Training data [Images, ground truth]**

In [None]:
"""1 Viuslaization data Processing as input input 
   2 Image coordinate project form 3D to 2D
   3. Create the Mask as the Output Base on 

"""

img0 = imread(PATH + 'train_images/' + train['ImageId'][300] + '.jpg')
img = preprocess_image(img0,flip=True)
mask, regr = get_mask_and_regr(img0, train['PredictionString'][300],flip=True)

print('img.shape', img.shape, 'std:', np.std(img))
print('mask.shape', mask.shape, 'std:', np.std(mask))
print('regr.shape', regr.shape, 'std:', np.std(regr))
print(img[:,::-1].shape)

fig=plt.figure( figsize=(25, 20), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .4, wspace=.0001)
row= 8
columns=1
i=1
all_data=[img,img[:,::-1], mask, regr[:,:,-2] ]
all_label=["rescale image", "rescale flip image","Grouth-truth Mask", "Yaw values" ]
for id in range(4): 
      #print(id1, id2)
    fig.add_subplot(row, columns, i)
    img=all_data[id]
    plt.imshow(img)
    plt.gca().set_title(all_label[id] )
    i+=1
plt.show()



4.1 Example of Given Prediction Mask -- Regression Mask Image then model output

In [None]:
IMG_SHAPE=img.shape
DISTANCE_THRESH_CLEAR = 2
idx=2
img0 = imread(PATH + 'train_images/' + train['ImageId'].iloc[20+idx] + '.jpg')

print(img0.shape)

img1 = img0[:,::-1]
print(img1.shape)
mask, regr = get_mask_and_regr(img0, train['PredictionString'][20+idx])
print(regr.shape)
regr= np.rollaxis(regr, 2, 0)
##get back coordinate 
coords= extract_coords(np.concatenate([mask[None], regr], 0))
print(f'_____coordinate PREDICTION example______:')
print(coords)

fig=plt.figure( figsize=(15, 12), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .4, wspace=.0001)

i=1
all_data=[img1, mask,regr[4, :, :] ]
all_label=['image_input', 'Example Mask Prediction', 'Example Regr Mask Prediciton']
row=3
columns=1

for id in range(3):
    fig.add_subplot(row, columns, i)
    img=all_data[id]
    plt.imshow(img)
    plt.gca().set_title(all_label[id] )
    i+=1
plt.show()


### **5. Buidling Data Augmentation Pipeline**
+ '''Data Augmentation Pipeline Inlcude'''
1. **Contrast Enhancement**
2. **BrightNess Enhancement**
3. **Fliping L-R position**
4. **Adding Noise to Images**


In [None]:
'''1. Contrast Enhancement Augmentation'''
def Contrast_enhance(img_number):
    seed = np.random.randint(1, 2019)
    np.random.seed(seed)
    df = pd.DataFrame(columns = ['ImageId', 'PredictionString'])
    fname_list = np.random.choice(glob('../input/pku-autonomous-driving/train_images/*'), img_number)
    train.set_index("ImageId" , inplace=True)
    for i,ax in enumerate(fname_list):
        fname = fname_list[i]
        img = Image.open(fname)
        ##add contrast here
        enh = ImageEnhance.Contrast(img)
        img_enh = enh.enhance(np.random.uniform(1.5, 2))#PIL.Image
        ##save image 
        img_enh.save('Contrast_'+str(i)+".jpg")
        ##create enhanced image's PredictionString from oringinal image
        pstring=train.loc[fname.split('/')[-1].split('.jpg')[0]][0]
        a={'ImageId':('Contrast_'+str(i)+'.jpg'),'PredictionString':pstring}
        df=df.append(a,ignore_index=True)
        #show example of augmentation after and before
        if i==0:
            print('Original\n')
            plt.imshow(img)
            plt.show()
            print('Contrast_enhance\n')
            plt.imshow(img_enh)
            plt.show()
       
      
    train.reset_index(inplace=True)    
    return df

'''2. Brightness Ehancement'''

def Brightness_enhance(img_number):
    seed = np.random.randint(1, 2019)
    np.random.seed(seed)
    df = pd.DataFrame(columns = ['ImageId', 'PredictionString'])
    fname_list = np.random.choice(glob('../input/pku-autonomous-driving/train_images/*'), img_number)
    train.set_index("ImageId" , inplace=True)
    for i,ax in enumerate(fname_list):
        fname = fname_list[i]
        img = Image.open(fname)
        ##add Brightness here
        enh = ImageEnhance.Brightness(img)
        img_enh = enh.enhance(np.random.uniform(0.5, 1.0))#PIL.Image
        ##save image
        img_enh.save('Brightness_'+ str(i)+".jpg")
        ##create enhanced image's PredictionString from oringinal image
        pstring=train.loc[fname.split('/')[-1].split('.jpg')[0]][0]
        a={'ImageId':('Brightness_'+str(i)+'.jpg'),'PredictionString':pstring}
        df=df.append(a,ignore_index=True)
        #show example of augmentation after and before
        if i==0:
            print('Original\n')
            plt.imshow(img)
            plt.show()
            print('Brightness_enhance\n')
            plt.imshow(img_enh)
            plt.show()
    train.reset_index(inplace=True)    
    return df

'''3. Fliping Image Position'''

def FlipLR_enhance(img_number):
    seed = np.random.randint(1, 2019)
    np.random.seed(seed)
    df = pd.DataFrame(columns = ['ImageId', 'PredictionString'])
    fname_list = np.random.choice(glob('../input/pku-autonomous-driving/train_images/*'), img_number)
    train.set_index("ImageId" , inplace=True)
    for i,ax in enumerate(fname_list):
        fname = fname_list[i]
        img = Image.open(fname)
        ##add Brightness here
        enh =np.fliplr(img)
        img_enh = Image.fromarray(enh)
        ##save image
        img_enh.save('FlipLR_'+str(i)+".jpg")
        ##create enhanced image's PredictionString from oringinal image
        pstring=train.loc[fname.split('/')[-1].split('.jpg')[0]][0]
        x=pstring.split()
        s=''
        for r in range(len(x)):
            if r%7==1 or r%7==3 or r%7==5:
                x[r]=str(-float(x[r]))
        for r in x:
            s=s+r+' '
        s=s[:-1]
        a={'ImageId':('FlipLR_'+str(i)+'.jpg'),'PredictionString':s}
        df=df.append(a,ignore_index=True)
        #show example of augmentation after and before
        if i==0:
            print('Original\n')
            plt.imshow(img)
            plt.show()
            print('FlipLR_enhance\n')
            plt.imshow(img_enh)
            plt.show()
    train.reset_index(inplace=True)    
    return df

def add_noise(image):
    """gauss noise"""
    row,col,ch= image.shape
    mean = 0
    var = np.random.random()*0.01 #0.001~0.01
    sigma = var**0.5
    gauss = np.random.normal(mean,sigma,(row,col,ch))
    gauss = gauss.reshape(row,col,ch)
    noisy = image + gauss
    noisy = np.clip(noisy, 0, 1)
    return noisy

def Noise_enhance(img_number):
    seed = np.random.randint(1, 2019)
    np.random.seed(seed)
    df = pd.DataFrame(columns = ['ImageId', 'PredictionString'])
    fname_list = np.random.choice(glob('../input/pku-autonomous-driving/train_images/*'),img_number)

    train.set_index("ImageId" , inplace=True)
    for i,ax in enumerate(fname_list):
        fname = fname_list[i]
        ori_img = plt.imread(fname)
        img = (ori_img/255).astype('float32')
        ##add noise here
        enh = add_noise(img)#gauss
        img_enh = Image.fromarray((enh * 255).astype(np.uint8)).resize((3384, 2710)).convert('RGB')
        #save image
        img_enh.save('Noise_'+str(i)+".jpg")
        ##create enhanced image's PredictionString from oringinal image
        pstring=train.loc[fname.split('/')[-1].split('.jpg')[0]][0]
        a={'ImageId':('Noisep_'+str(i)+'.jpg'),'PredictionString':pstring}
        df=df.append(a,ignore_index=True)
        #show example of augmentation after and before
        if i==0:
            print('Original\n')
            plt.imshow(ori_img)
            plt.show()
            print('Noise_enhance\n')
            plt.imshow(img_enh)
            plt.show()
    train.reset_index(inplace=True)    
    return df


**VISUALIZE DATA AUGMENTAITON TECHNIQUE APPLY**

In [None]:
# create images and dataframe
import glob
from PIL import Image, ImageEnhance
from matplotlib import pyplot as plt
from glob import glob
Contrast_df=Contrast_enhance(5)
Brightness_df=Brightness_enhance(5)
FlipLR_df=FlipLR_enhance(5)
Noise_df=Noise_enhance(5)


#merge data frame with train dataframe
train = pd.read_csv(PATH + 'train.csv')
train=train.append(Contrast_df,ignore_index=True)
train=train.append(Brightness_df,ignore_index=True)
train=train.append(FlipLR_df,ignore_index=True)
train=train.append(Noise_df,ignore_index=True)
print(train)


### **6. Creating Dataset and Dataloader for the Dataset**

In [None]:
'''Data Generator Optional  for Keras Model'''

# from tensorflow import keras
# from tensorflow.keras.preprocessing.image import load_img
# import torch

# class CarDataset(keras.utils.Sequence):
#     """Helper to iterate over the data (as Numpy arrays)."""

#     def __init__(self,batch_size, df, root_dir, training= True ):
        
#         self.batch_size = batch_size
#         self.img_id = df
#         self.root_dir = root_dir
#         self.training = training
        

#     def __len__(self):
#         print(len(self.img_id) // self.batch_size)
#         return len(self.img_id) // self.batch_size
    

#     def __getitem__(self, idx):
        
#         """Returns tuple (input, target) correspond to batch #idx."""
#         i = idx * self.batch_size
        
#         print(i)
#          # Get index name
#         idx, labels = self.img_id.values[idx]
#         # Get image name
#         img_name = self.root_dir.format(idx)
        
#         # Augmentation
#         flip = False
#         if self.training:
#             flip = np.random.randint(2) == 1
#         ## reading image
#         img0 = imread(img_name, True)
#         img = preprocess_image(img0, flip=flip)
#         img = np.rollaxis(img, 2, 0)
        
#         # Get mask and regression maps
#         mask, regr = get_mask_and_regr(img0, labels,flip=flip )
#         regr = np.rollaxis(regr, 2, 0)
        
#         return [img, mask, regr]


'''Testing Data Loader'''
# from sklearn.model_selection import train_test_split

# train_images_dir = PATH + 'train_images/{}.jpg'
# train_masks_dir = PATH + 'train_masks/{}.jpg'
# test_images_dir = PATH + 'test_images/{}.jpg'
# test_masks_dir = PATH + 'test_masks/{}.jpg'

# df_train, df_dev = train_test_split(train, test_size=0.1, random_state=42)
# # class CarDataset(keras.utils.Sequence):
# #     """Helper to iterate over the data (as Numpy arrays)."""

# #     def __init__(self,batch_size, df, root_dir, training= True ):
# ds_data= CarDataset(batch_size=32, df=df_train,root_dir= train_images_dir)
# image, test, test1= ds_data.__getitem__(1)
        
       

###

In [None]:
'''____Pytorch Data Loader____ '''
import torch
from torch.utils.data import Dataset, DataLoader
class Custome_Generator(Dataset):
    """Car dataset."""

    def __init__(self, dataframe, root_dir, training=True, transform=None):
        self.df = dataframe
        self.root_dir = root_dir
        
        self.training = training

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Get image name
        idx, labels = self.df.values[idx]
        img_name = self.root_dir.format(idx)
          
        # Augmentation
        flip = False
        if self.training:
            flip = np.random.randint(2) == 1
        
        # Read image
        img0 = imread(img_name, fast_mode=True)

        img = preprocess_image(img0, flip=False)

        img = np.rollaxis(img, 2, 0)
        #print(img.shape)
        
        # Get mask and regression maps
        mask, regr = get_mask_and_regr(img0, labels, flip=False)
        regr = np.rollaxis(regr, 2, 0)
        return [img, mask, regr]

    
    
    
    
'''_______Testing Data Generator Pytorch______'''

train_images_dir = PATH + 'train_images/{}.jpg'
test_images_dir = PATH + 'test_images/{}.jpg'
test= pd.read_csv(PATH+ 'sample_submission.csv')
df_train, df_val = train_test_split(train, test_size=0.01, random_state=42)
df_test = test

# Create dataset objects
train_dataset = Custome_Generator(df_train, train_images_dir, training=True,)
image, mask, regr=train_dataset.__getitem__(0)

val_dataset = Custome_Generator(df_val, train_images_dir,)
test_dataset = Custome_Generator(df_test, test_images_dir,)


'''________Creating the Pytorch Data Loader_______'''
BATCH_SIZE=4
#train, val, test Data Generator 
train_datagen= DataLoader(dataset= train_dataset, batch_size= BATCH_SIZE, shuffle=True, num_workers=4)

val_datagen=DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_datagen = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


'''________Visualize the Ouput of DataGenerator_______'''
fig=plt.figure( figsize=(12, 8), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .4, wspace=.0001)

i=1
all_data=[np.rollaxis(image, 0, 3), mask,regr[5, : , :] ]
all_label=['image', 'mask', 'regression map']
row=3
columns=1
for id in range(3):
    fig.add_subplot(row, columns, i)
    img=all_data[id]
    plt.imshow(img)
    plt.gca().set_title(all_label[id] )
    i+=1
plt.show()
#plt.imshow(image.reshape(320, 1024,3))

### **Building Unet-Like model** 
1. Model with Encoder part will using MobileNetV2, and Efficient Net 
2. Construct model and Training model

In [None]:
## Import EfficinetModel 
!pip install efficientnet-pytorch
!pip install torchsummary

In [None]:
import torch
## Import EfficientNet Base
from efficientnet_pytorch import EfficientNet 

EfficientNet_base = EfficientNet.from_pretrained('efficientnet-b0')
#print(EfficientNet_base.eval())
mobileNetV2_base = torch.hub.load('pytorch/vision:v0.9.0', 'mobilenet_v2', pretrained=True,)
# mobileNetV2_base.eval()

### Building -- Traning Custom Unet Model

In [None]:
'''In this section we will Construct Unet Architecture with Encoder 

1.Encoder EfficientB0
2.Encoder Efficienet B1

'''
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from tqdm import tqdm#_notebook as tqdm

from functools import reduce
import os
from scipy.optimize import minimize
import plotly.express as px
import torch.nn.functional as F

class Unet_EfficienetB0(nn.Module):
    '''Mixture of previous classes'''
    def __init__(self, n_classes):
        super(Unet_EfficienetB0, self).__init__()
        ## EfficientNet B0-- B1 backbone
        self.base_model = EfficientNet.from_pretrained('efficientnet-b0')
        
        self.conv0 = double_conv(5, 64)
        self.conv1 = double_conv(64, 128)
        self.conv2 = double_conv(128, 512)
        self.conv3 = double_conv(512, 1024)
        
        self.mp = nn.MaxPool2d(2)
        
        self.up1 = up_sample(1282 + 1024, 512)
        self.up2 = up_sample(512 + 512, 256)
        self.outc = nn.Conv2d(256, n_classes, 1)

    def forward(self, x):
        batch_size = x.shape[0]
        mesh1 = get_mesh(batch_size, x.shape[2], x.shape[3])
        x0 = torch.cat([x, mesh1], 1)
        x1 = self.mp(self.conv0(x0))
        x2 = self.mp(self.conv1(x1))
        x3 = self.mp(self.conv2(x2))
        x4 = self.mp(self.conv3(x3))
        
        x_center = x[:, :, :, IMG_WIDTH // 8: -IMG_WIDTH // 8]
        feats = self.base_model.extract_features(x_center)
        bg = torch.zeros([feats.shape[0], feats.shape[1], feats.shape[2], feats.shape[3] // 8]).cuda()
        feats = torch.cat([bg, feats, bg], 3)
        
        # Add positional info
        mesh2 = get_mesh(batch_size, feats.shape[2], feats.shape[3])
        feats = torch.cat([feats, mesh2], 1)
        
        x = self.up1(feats, x4)
        x = self.up2(x, x3)
        x = self.outc(x)
        return x

In [None]:

class double_conv(nn.Module):
    '''(conv => GN => ReLU) * 2
    
    Uing GroupNorm help stable training in small batch_size
    
    '''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            #nn.GroupNorm(out_ch, 3, 6),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
#             nn.BatchNorm2d(out_ch),
            #nn.GroupNorm(out_ch, 3, 6),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.conv(x)
        return x

class up_sample(nn.Module):
    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up_sample, self).__init__()

        #  would be a nice idea if the upsampling could be learned too,
        #  but my machine do not have enough memory to handle all those weights
        if bilinear:
            self.up_sample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up_sample = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x1, x2=None):
        x1 = self.up_sample(x1)
        
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
                        diffY // 2, diffY - diffY//2))
        
        # for padding issues, see 
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        
        if x2 is not None:
            x = torch.cat([x2, x1], dim=1)
        else:
            x = x1
        x = self.conv(x)
        return x

def get_mesh(batch_size, shape_x, shape_y):
    mg_x, mg_y = np.meshgrid(np.linspace(0, 1, shape_y), np.linspace(0, 1, shape_x))
    mg_x = np.tile(mg_x[None, None, :, :], [batch_size, 1, 1, 1]).astype('float32')
    mg_y = np.tile(mg_y[None, None, :, :], [batch_size, 1, 1, 1]).astype('float32')
    mesh = torch.cat([torch.tensor(mg_x).cuda(), torch.tensor(mg_y).cuda()], 1)
    return mesh

In [None]:
##This Code for Training On Single GPU
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(device)
n_epochs = 50
torch.cuda.set_device('cuda:0')
model = Unet_EfficienetB0(8).cuda()

## Model Train on Multiple GPU 
# device_ids=[1, 2, 3]
# model= torch.nn.DataParallel(Unet_EfficienetB0(8), device_ids=device_ids)

optimizer = optim.Adam(model.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=max(n_epochs, 10) * len(train_datagen) // 3, gamma=0.1)


**Here Summary of Our model Parameter**

In [None]:
from torchsummary import summary
model= Unet_EfficienetB0(8)
summary(model.cuda(), ( 3, 320, 1024))

In [None]:
'''###Define The LOSS Function '''
def criterion(prediction, mask, regr, size_average=True):
    # Binary mask loss
    pred_mask = torch.sigmoid(prediction[:, 0])
    mask_loss = mask * torch.log(pred_mask + 1e-12) + (1 - mask) * torch.log(1 - pred_mask + 1e-12)
    mask_loss = -mask_loss.mean(0).sum()
    
    # Regression L1 loss
    pred_regr = prediction[:, 1:]
    regr_loss = (torch.abs(pred_regr - regr).sum(1) * mask).sum(1).sum(1) / mask.sum(1).sum(1)
    regr_loss = regr_loss.mean(0)
    
    # Sum
    loss = mask_loss + regr_loss
    if not size_average:
        loss *= prediction.shape[0]
    return loss

In [None]:
def train_model(epoch, history=None):
    model.train()

    for batch_idx, (img_batch, mask_batch, regr_batch) in enumerate(tqdm(train_datagen)):
        #img_batch = torch.nn.DataParallel(img_batch,device_ids=device_ids)#img_batch.to(device_ids[0])
        #mask_batch = torch.nn.DataParallel(mask_batch,device_ids=device_ids)#mask_batch.to(device_ids[0])
        #regr_batch = torch.nn.DataParallel(regr_batch,device_ids=device_ids)#regr_batch.to(device_ids[0])
        img_batch =  img_batch.cuda()
        mask_batch =  mask_batch.cuda()
        regr_batch =  regr_batch.cuda()
        optimizer.zero_grad()
        output = model(img_batch)
        loss = criterion(output, mask_batch, regr_batch)
        if history is not None:
            history.loc[epoch + batch_idx / len(train_datagen), 'train_loss'] = loss.data.cpu().numpy()
        
        loss.backward()
        
        optimizer.step()
        exp_lr_scheduler.step()
    
    print('Train Epoch: {} \tLR: {:.6f}\tLoss: {:.6f}'.format(
        epoch,
        optimizer.state_dict()['param_groups'][0]['lr'],
        loss.data))

def evaluate_model(epoch, history=None):
    model.eval()
    loss = 0
    
    with torch.no_grad():
        for img_batch, mask_batch, regr_batch in val_datagen:
#             img_batch =  torch.nn.DataParallel(img_batch,device_ids=device_ids)#.to(device_ids[0])
#             mask_batch =  torch.nn.DataParallel(mask_batch,device_ids=device_ids)#.to(device_ids[0])
#             regr_batch =  torch.nn.DataParallel(regr_batch,device_ids=device_ids)#.to(device_ids[0])
            img_batch =  img_batch.cuda()
            mask_batch =  mask_batch.cuda()
            regr_batch =  regr_batch.cuda()

            output = model(img_batch)

            loss += criterion(output, mask_batch, regr_batch, size_average=False).data
    
    loss /= len(dev_loader.dataset)
    
    if history is not None:
        history.loc[epoch, 'dev_loss'] = loss.cpu().numpy()
    
    print('Dev loss: {:.4f}'.format(loss))

In [None]:
## The model Training in colab run more than 25 hours to complete at 60 epochs
## This model suitable run on Several with multiple GPUS 
import gc
n_epochs=1
history = pd.DataFrame()

for epoch in range(n_epochs):
    torch.cuda.empty_cache()
    gc.collect()
    train_model(epoch, history)
    evaluate_model(epoch, history)
    state = {
    'epoch': epoch,
    'state_dict': model.state_dict(),
    'optimizer': optimizer.state_dict(),
    'lr_scheduler':exp_lr_scheduler.state_dict()
    }
    torch.save(state, '/Unetlike_model.pth')