In [2]:
!pip install -U albumentations>=0.3.0 --user 
!pip install -U --pre segmentation-models --user

Requirement already up-to-date: segmentation-models in /Users/pratiklikhar/anaconda3/envs/pratik/lib/python3.7/site-packages (1.0.1)


In [3]:
root_dir = '/Users/pratiklikhar/MTech_Project'

In [4]:
import os

In [5]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import cv2
import keras
import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()
    
# helper function for data visualization    
def denormalize(x):
    """Scale image to range 0..1 for correct plot"""
    x_max = np.percentile(x, 98)
    x_min = np.percentile(x, 2)    
    x = (x - x_min) / (x_max - x_min)
    x = x.clip(0, 1)
    return x
    

# classes for data loading and preprocessing
class Dataset:
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
    
    def __init__(
            self, 
            images_dir, 
            masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]
        
        #print(self.masks_fps)
        
        # convert str names to class values on masks
        self.class_values = [self.CLASSES.index(cls) for cls in classes]
        
        #print(self.class_values)
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.masks_fps[i], 0)
        #print(mask)
        
        # extract certain classes from mask (e.g. cars)
        masks = [(mask == (v*2)+3) for v in self.class_values]
        #for v in self.class_values:
        #print(v)
        #print(masks)
        mask = np.stack(masks, axis=-1).astype('float')
        #print(mask)
        
        # add background if mask is not binary
        if mask.shape[-1] != 1:
            background = 1 - mask.sum(axis=-1, keepdims=True)
            mask = np.concatenate((mask, background), axis=-1)
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, mask
        
    def __len__(self):
        return len(self.ids)
    
    
class Dataloder(keras.utils.Sequence):
    """Load data from dataset and form batches
    
    Args:
        dataset: instance of Dataset class for image loading and preprocessing.
        batch_size: Integet number of images in batch.
        shuffle: Boolean, if `True` shuffle image indexes each epoch.
    """
    
    def __init__(self, dataset, batch_size=1, shuffle=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(dataset))

        self.on_epoch_end()

    def __getitem__(self, i):
        
        # collect batch data
        start = i * self.batch_size
        stop = (i + 1) * self.batch_size
        data = []
        for j in range(start, stop):
            data.append(self.dataset[j])
        
        # transpose list of lists
        batch = [np.stack(samples, axis=0) for samples in zip(*data)]
        
        return batch
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return len(self.indexes) // self.batch_size
    
    def on_epoch_end(self):
        """Callback function to shuffle indexes each epoch"""
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)   

In [7]:
import segmentation_models as sm

Segmentation Models: using `keras` framework.


In [8]:
BACKBONE = 'resnet101'
BATCH_SIZE = 2
CLASSES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
LR = 0.0001
EPOCHS = 40

preprocess_input = sm.get_preprocessing(BACKBONE)

In [9]:
# define network parameters
n_classes = 1 if len(CLASSES) == 1 else (len(CLASSES) + 1)  # case for binary and multiclass segmentation
activation = 'sigmoid' if n_classes == 1 else 'softmax'

#create model
model = sm.Unet(BACKBONE, classes=n_classes, activation=activation)




In [10]:
# define optomizer
optim = keras.optimizers.Adam(LR)

# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
# set class weights for dice_loss (car: 1.; pedestrian: 2.; background: 0.5;)
#dice_loss = sm.losses.DiceLoss(class_weights=np.array([1, 2, 0.5])) 
focal_loss = sm.losses.BinaryFocalLoss() if n_classes == 1 else sm.losses.CategoricalFocalLoss()
#total_loss = dice_loss + (1 * focal_loss)

# actulally total_loss can be imported directly from library, above example just show you how to manipulate with losses
# total_loss = sm.losses.binary_focal_dice_loss # or sm.losses.categorical_focal_dice_loss 

metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)]

# compile keras model with defined optimozer, loss and metrics
model.compile(optim, focal_loss, metrics)

In [11]:
model.load_weights('best_model1.h5') 

In [12]:
import numpy as np
import cv2
import datetime
from IPython import display
import matplotlib.pyplot as py
%matplotlib inline

def showVideo():
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FPS, 60)
    print(cap.isOpened())
    print(cap.read())
    cap.set(3, 720)   #480 #720 #1280
    cap.set(4, 480)   #320 #480 #720
    fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
    out = cv2.VideoWriter('output.avi', fourcc, 5.0, (int(cap.get(3)),int(cap.get(4))))
    try:
        while(cap.isOpened()):
            # Capture frame-by-frame
            ret, frame = cap.read()
            if not ret:
                # Release the Video Device if ret is false
                cap.release()
                # Message to be displayed after releasing the device
                print ("Released Video Resource")
                break
            #print(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            #print(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            #print(cap.get(3))
            #print(cap.get(4))
            #out.write(frame)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, (640,480))
            #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = np.expand_dims(frame, axis=0)
            pr_mask = model.predict(frame)
            highind = 0
            secondhighind = 0
            high = 0
            secondhigh = -1
            for i in range(37):
                addn = np.sum(pr_mask[..., i][0:480][0:640])
                if i == 0:
                    if addn > high:
                        high = addn
                        highind = i
                        secondhigh = addn
                        secondhighind = i
                else:
                    if addn > high:
                        secondhigh = high
                        secondhighind = highind
                        high = addn
                        highind = i
                    elif addn > secondhigh and addn < high:
                        secondhigh = addn
                        secondhighind = i
            print(secondhighind)
            if secondhighind == 0:
                text = '0'
            elif secondhighind == 1:
                text = '1'
            elif secondhighind == 2:
                text = '2'
            elif secondhighind == 3:
                text = '3'
            elif secondhighind == 4:
                text = '4'
            elif secondhighind == 5:
                text = '5'
            elif secondhighind == 6:
                text = '6'
            elif secondhighind == 7:
                text = '7'
            elif secondhighind == 8:
                text = '8'
            elif secondhighind == 9:
                text = '9'
            elif secondhighind == 10:
                text = 'A'
            elif secondhighind == 11:
                text = 'B'
            elif secondhighind == 12:
                text = 'C'
            elif secondhighind == 13:
                text = 'D'
            elif secondhighind == 14:
                text = 'E'
            elif secondhighind == 15:
                text = 'F'
            elif secondhighind == 16:
                text = 'G'
            elif secondhighind == 17:
                text = 'H'
            elif secondhighind == 18:
                text = 'I'
            elif secondhighind == 19:
                text = 'J'
            elif secondhighind == 20:
                text = 'K'
            elif secondhighind == 21:
                text = 'L'
            elif secondhighind == 22:
                text = 'M'
            elif secondhighind == 23:
                text = 'N'
            elif secondhighind == 24:
                text = 'O'
            elif secondhighind == 25:
                text = 'P'
            elif secondhighind == 26:
                text = 'Q'
            elif secondhighind == 27:
                text = 'R'
            elif secondhighind == 28:
                text = 'S'
            elif secondhighind == 29:
                text = 'T'
            elif secondhighind == 30:
                text = 'U'
            elif secondhighind == 31:
                text = 'V'
            elif secondhighind == 32:
                text = 'W'
            elif secondhighind == 33:
                text = 'X'
            elif secondhighind == 34:
                text = 'Y'
            elif secondhighind == 35:
                text = 'Z'
            else:
                text = 'NA'
            frame = frame.squeeze()
            # Turn off the axis
            py.axis('off')
            # Title of the window
            py.title("Input Stream")
            font = cv2.FONT_HERSHEY_SIMPLEX
            #text = 'Width: '+ str(cap.get(3)) +' Height: '+ str(cap.get(4))
            datet = str(datetime.datetime.now())
            frame = cv2.putText(frame, text, (10,50), font, 1, (0,255,255), 2, cv2.LINE_AA)
            out.write(frame)
            # Display the frame
            py.imshow(frame)
            py.show()
            # Display the frame until new frame is available
            display.clear_output(wait=True)
    except KeyboardInterrupt:
        # Release the Video Device
        cap.release()
        out.release()
        # Message to be displayed after releasing the device
        print("Released Video Resource")
    pass

In [13]:
showVideo()

Released Video Resource


In [None]:
vid = cv2.VideoCapture(0) 
  
while(True): 
      
    # Capture the video frame 
    # by frame 
    ret, frame = vid.read() 
  
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (640,480))
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = np.expand_dims(frame, axis=0)
    pr_mask = model.predict(frame)
    highind = 0
    secondhighind = 0
    high = 0
    secondhigh = -1
    for i in range(37):
        addn = np.sum(pr_mask[..., i][0:480][0:640])
        if i == 0:
            if addn > high:
                high = addn
                highind = i
                secondhigh = addn
                secondhighind = i
        else:
            if addn > high:
                secondhigh = high
                secondhighind = highind
                high = addn
                highind = i
            elif addn > secondhigh and addn < high:
                secondhigh = addn
                secondhighind = i
    print(secondhighind)
    if secondhighind == 0:
        text = '0'
    elif secondhighind == 1:
        text = '1'
    elif secondhighind == 2:
        text = '2'
    elif secondhighind == 3:
        text = '3'
    elif secondhighind == 4:
        text = '4'
    elif secondhighind == 5:
        text = '5'
    elif secondhighind == 6:
        text = '6'
    elif secondhighind == 7:
        text = '7'
    elif secondhighind == 8:
        text = '8'
    elif secondhighind == 9:
        text = '9'
    elif secondhighind == 10:
        text = 'A'
    elif secondhighind == 11:
        text = 'B'
    elif secondhighind == 12:
        text = 'C'
    elif secondhighind == 13:
        text = 'D'
    elif secondhighind == 14:
        text = 'E'
    elif secondhighind == 15:
        text = 'F'
    elif secondhighind == 16:
        text = 'G'
    elif secondhighind == 17:
        text = 'H'
    elif secondhighind == 18:
        text = 'I'
    elif secondhighind == 19:
        text = 'J'
    elif secondhighind == 20:
        text = 'K'
    elif secondhighind == 21:
        text = 'L'
    elif secondhighind == 22:
        text = 'M'
    elif secondhighind == 23:
        text = 'N'
    elif secondhighind == 24:
        text = 'O'
    elif secondhighind == 25:
        text = 'P'
    elif secondhighind == 26:
        text = 'Q'
    elif secondhighind == 27:
        text = 'R'
    elif secondhighind == 28:
        text = 'S'
    elif secondhighind == 29:
        text = 'T'
    elif secondhighind == 30:
        text = 'U'
    elif secondhighind == 31:
        text = 'V'
    elif secondhighind == 32:
        text = 'W'
    elif secondhighind == 33:
        text = 'X'
    elif secondhighind == 34:
        text = 'Y'
    elif secondhighind == 35:
        text = 'Z'
    else:
        text = 'NA'
    frame = frame.squeeze()
    font = cv2.FONT_HERSHEY_SIMPLEX
    frame = cv2.putText(frame, text, (10,50), font, 1, (0,255,255), 2, cv2.LINE_AA)
    # Display the resulting frame 
    cv2.imshow('frame', frame) 
      
    # the 'q' button is set as the 
    # quitting button you may use any 
    # desired button of your choice 
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break
  
# After the loop release the cap object 
vid.release() 
# Destroy all the windows 
cv2.destroyAllWindows() 

33
7
7
33
7
4
7
4
7
4
5
5
4
4
4
4
4
4
4
31
2
2
2
2
2
2
2
2
2
32
32
18
20
31
31
31
2
2
2
4
4
