# Setup

In [None]:
### Mounting with google drive 
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
### Importing required libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import matplotlib.pyplot as plt
import torch
# Neural networks can be constructed using the torch.nn package.
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Subset
from torch.utils.data.sampler import SequentialSampler
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms

In [None]:
## Switching to working directory
%cd '/content/drive/My Drive/Research/SAnet/yolov5/yolov5'
!pwd

/content/drive/My Drive/Research/SAnet/yolov5/yolov5
/content/drive/My Drive/Research/SAnet/yolov5/yolov5


# Yolo code

In [None]:
import argparse

import torch.backends.cudnn as cudnn
from google.colab.patches import cv2_imshow
from utils import google_utils
from utils.datasets import *
from utils.utils import *

import glob
import math
import os
import random
import shutil
import time
from pathlib import Path
from threading import Thread

import cv2
import numpy as np
import torch
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm

from utils.utils import xyxy2xywh, xywh2xyxy

help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']



class LoadImages1:  # for inference
    def __init__(self, path, session, img_size=416):
        path = str(Path(path)) 
        files = []
        if os.path.isdir(path):
            files = sorted(glob.glob(os.path.join(path, '*.*')))
        elif os.path.isfile(path):
            files = [path]

        images = []
        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]          

        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
        nI, nV = len(images), len(videos)

        self.img_size = img_size
        self.files = images + videos
        self.nF = nI + nV  # number of files
        self.video_flag = [False] * nI + [True] * nV
        self.mode = 'images'
        if any(videos):
            self.new_video(videos[0])  # new video
        else:
            self.cap = None
        assert self.nF > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
                            (path, img_formats, vid_formats)

    def __iter__(self):
        self.count = 0
        return self

    def __next__(self):
        if self.count == self.nF:
            raise StopIteration
        path = self.files[self.count]

        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
            ret_val, img0 = self.cap.read()
            if not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nF:  # last video
                    raise StopIteration
                else:
                    path = self.files[self.count]
                    self.new_video(path)
                    ret_val, img0 = self.cap.read()

            self.frame += 1
            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')

        else:
            # Read image
            self.count += 1
            img0 = cv2.imread(path)  # BGR
            assert img0 is not None, 'Image Not Found ' + path
            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')

        # Padded resize
        img = letterbox(img0, new_shape=self.img_size)[0]

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return path, img, img0, self.cap

    def new_video(self, path):
        self.frame = 0
        self.cap = cv2.VideoCapture(path)
        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

    def __len__(self):
        return self.nF  # number of files





def detect(source_path,session,save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, source_path, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')


    # Initialize
    device = torch_utils.select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    google_utils.attempt_download(weights)
    model = torch.load(weights, map_location=device)['model'].float()  # load to FP32

    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages1(source, session, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    bounding_boxes_all_images = []
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = torch_utils.time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = torch_utils.time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  #  normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                minx = float("inf")
                miny = float("inf")
                maxx = 0
                maxy = 0

                box_num = 0
                bounding_boxes = {}
                for *xyxy, conf, cls in det:
                    box_num+=1
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    ### Saving bounding box
                    if (bounding_boxes.get(names[int(cls)], None) == None):
                        bounding_boxes[names[int(cls)]] = [[int(xyxy[0]), int(xyxy[1]), abs(int(xyxy[2])-int(xyxy[0])), abs(int(xyxy[3])-int(xyxy[1]))]]
                    else:
                        bounding_boxes[names[int(cls)]].append([int(xyxy[0]), int(xyxy[1]), abs(int(xyxy[2])-int(xyxy[0])), abs(int(xyxy[3])-int(xyxy[1]))])


                    tlx,tly, brx, bry = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3])
                    if tlx < minx: 
                        minx = tlx 
                    if tly < miny:
                        miny = tly
                    if bry > maxy:
                        maxy = bry
                    if brx > maxx:
                        maxx = brx # For cropped image use: crop_img = img[y:y+h, x:x+w]


                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file:
                            file.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
            bounding_boxes_all_images.append(bounding_boxes)    
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))
            im0 = im0[miny:maxy, minx:maxx]


            # Stream results
            if view_img:
                cv2_imshow( im0)
                not_showing = True
                #cv2.imshow(p, im0)
                
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    print(save_path)
                    cv2.imwrite(save_path, im0)
                    not_saving = True
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return bounding_boxes_all_images

######################################################################################

class Options:
    def __init__(self):
      self.weights = 'weights/best.pt'
      self.source = 'inference/images'
      self.output = '/content/drive/My Drive/cropped_images' #'inference/output'
      self.img_size = 640
      self.conf_thres = 0.4
      self.iou_thres = 0.5
      self.fourcc = 'mp4v'
      self.device = ''
      self.view_img = False
      self.agnostic_nms = False
      self.augment = False      
      self.save_txt = False
      self.classes = None

opt = Options()
# # if __name__ == '__main__':
# parser = argparse.ArgumentParser()
# parser.add_argument('--weights', type=str, default='weights/yolov5s.pt', help='model.pt path')
# parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
# parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
# parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
# parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
# parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
# parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
# parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
# parser.add_argument('--view-img', action='store_true', help='display results')
# parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
# parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
# parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
# parser.add_argument('--augment', action='store_true', help='augmented inference')
# opt = parser.parse_args()
opt.img_size = check_img_size(opt.img_size)
print(opt)

opt.weights='weights/best.pt'
opt.view_img=False


<__main__.Options object at 0x7f1ee74ee320>


In [None]:
# # BASE_PATH = './'
DATA_PATH = '../../data_files/session7'

with torch.no_grad():
    source_path = DATA_PATH #os.path.join(DATA_PATH,'session1 ('+str(i+1)+').jpg')
    det_boxes_rgb_images5 = detect(source_path, 'rgb_images2')


Using CUDA device0 _CudaDeviceProperties(name='Tesla T4', total_memory=15079MB)





image 1/283 ../../data_files/session7/session7-1.jpg: 576x640 5 blemisheds, 6 unblemisheds, 3 gloves, 1 belts, 4 bins, 1 heads, Done. (0.052s)
../../data_files/cc7/session7-1.jpg
image 2/283 ../../data_files/session7/session7-10.jpg: 576x640 5 blemisheds, 6 unblemisheds, 2 gloves, 1 belts, 2 bins, 1 heads, Done. (0.050s)
../../data_files/cc7/session7-10.jpg
image 3/283 ../../data_files/session7/session7-100.jpg: 576x640 5 blemisheds, 5 unblemisheds, 2 gloves, 2 belts, 1 bins, 2 heads, Done. (0.050s)
../../data_files/cc7/session7-100.jpg
image 4/283 ../../data_files/session7/session7-101.jpg: 576x640 5 blemisheds, 5 unblemisheds, 2 gloves, 2 belts, 1 bins, 2 heads, Done. (0.050s)
../../data_files/cc7/session7-101.jpg
image 5/283 ../../data_files/session7/session7-102.jpg: 576x640 5 blemisheds, 5 unblemisheds, 2 gloves, 2 belts, 1 bins, 2 heads, Done. (0.050s)
../../data_files/cc7/session7-102.jpg
image 6/283 ../../data_files/session7/session7-103.jpg: 576x640 5 blemisheds, 6 unblemished

# Data loader

In [None]:
## Some paths used for loading annotation and data
ANNOS_PATH = '/content/drive/My Drive/Research/SAnet/data_files/annos/'
DATA_PATH = '/content/drive/My Drive/Research/SAnet/data_files/'


## Images will be normalised using this
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [None]:

## Read annotation file and return dataframe of annotations with 'session number', 'Image','O_loc','EE_loc'
def read_anno_file(file_path):
    session_annos = pd.read_csv(file_path)[['Image','O_loc','EE_loc']]
    session_annos['session'] = int( (file_path).split('_annos.csv')[0][-1] )
    return session_annos

## Read annotation files

# session1_annos = read_anno_file(ANNOS_PATH+'session1_annos.csv')
# session2_annos = read_anno_file(ANNOS_PATH+'session2_annos.csv')
session3_annos = read_anno_file(ANNOS_PATH+'session3_annos.csv')
session4_annos = read_anno_file(ANNOS_PATH+'session4_annos.csv')
session5_annos = read_anno_file(ANNOS_PATH+'session5_annos.csv')
session6_annos = read_anno_file(ANNOS_PATH+'session6_annos.csv')
session7_annos = read_anno_file(ANNOS_PATH+'session7_annos.csv')

## Concatenate annotaion frames
data = pd.concat([
                  # session1_annos, 
                  # session2_annos, 
                  session3_annos, 
                  session4_annos, 
                  session5_annos, 
                  session6_annos, 
                  session7_annos]).reset_index(drop=True)

data.loc[(data.O_loc == 0) & (data.EE_loc == 2), 'O_loc'] = 2
#data.groupby(['O_loc','EE_loc']).size().reset_index(name='counts')

print('annotations',data.head())



## loading all images to images_lib
images_lib = {}
for index in range(data.shape[0]):
    print(index)
    session_name = 'session'+str(data.loc[index, 'session'])
    img_name = session_name+'-'+str(data.loc[index, 'Image'])+'.jpg'
    img_name = os.path.join(DATA_PATH+session_name, img_name)
    image = Image.open(img_name)
    
    #image = image.convert('RGB')
    image = image.resize((640,480))
    label = torch.tensor(data.loc[index, ['EE_loc', 'O_loc']])
    if transform is not None:
        image = transform(image)
        images_lib[img_name] = image

In [None]:


## Shuffling and splitting dataset into train val test dataset. Split = 0.7:0.15:0.15
batch_size = 32
shuffle_dataset = False
random_seed= 42
dataset_size = len(data) 
indices = list(range(dataset_size))
split1 = int(np.floor(0.7 * dataset_size))
split2 = int(np.floor(0.15 * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
test_indices = indices[576:700]+indices[2065:] 
test1_indices = indices[2065:] # test1_indices is not required
train_val_indices = indices[:576]+indices[700:2065] 

np.random.seed(random_seed)
np.random.shuffle(train_val_indices)
train_indices = train_val_indices[:1358]
val_indices = train_val_indices[1358:]

# shuffling only train and val indices
np.random.seed(random_seed)
np.random.seed(random_seed)

print('train_indices', train_indices)
print('val_indices', val_indices)
print('test_indices', test_indices)
print('test1_indices', test1_indices)

print('len train_indices', len(train_indices))
print('len val_indices', len(val_indices))
print('len test_indices', len(test_indices))
print('len test1_indices', len(test1_indices))


## Dataset Class
class Arthopod_Dataset(Dataset):
    def __init__(self, data, transform, indexes):
        self.data = data
        self.transform = transform
        self.indices = indexes
        
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, index):
        index = self.indices[index]
        session_name = 'session'+str(self.data.loc[index, 'session'])
        img_name = session_name+'-'+str(self.data.loc[index, 'Image'])+'.jpg'
        img_name = os.path.join(DATA_PATH+session_name, img_name)
        image = images_lib[img_name]
        label = torch.tensor(self.data.loc[index, ['EE_loc', 'O_loc']])
        return image, label

  

train_dataset = Arthopod_Dataset(data,transform, train_indices)
val_dataset = Arthopod_Dataset(data,transform, val_indices)
test_dataset = Arthopod_Dataset(data,transform, test_indices)
test1_dataset = Arthopod_Dataset(data,transform, test1_indices)

## Creating PT data samplers and loaders:
train_sampler = SequentialSampler( train_indices)
valid_sampler = SequentialSampler( val_indices)
test_sampler = SequentialSampler( test_indices)
test1_sampler = SequentialSampler( test1_indices)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size,sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,sampler=test_sampler)
test1_loader = torch.utils.data.DataLoader(test1_dataset, batch_size=batch_size,sampler=test1_sampler)

train_indices [1729, 1626, 70, 1100, 1176, 1326, 1985, 867, 543, 1979, 1417, 429, 1523, 818, 708, 1098, 1245, 2007, 548, 1133, 2044, 212, 307, 1262, 69, 432, 1957, 1932, 128, 1818, 1475, 731, 1027, 1359, 1490, 1208, 247, 1865, 1171, 1270, 1426, 56, 1726, 196, 494, 962, 1612, 2024, 798, 239, 1798, 1616, 1513, 275, 1054, 1837, 717, 383, 1121, 529, 1012, 1750, 898, 538, 905, 1371, 744, 351, 1731, 868, 1311, 342, 1618, 1917, 1551, 303, 993, 1890, 2000, 483, 722, 1131, 1881, 324, 1648, 1283, 439, 111, 1839, 65, 188, 802, 1853, 331, 29, 297, 986, 99, 1743, 931, 1826, 1914, 256, 1124, 1357, 2031, 734, 353, 1861, 251, 879, 344, 1429, 1996, 120, 233, 1600, 479, 1594, 414, 1382, 519, 1181, 1109, 1444, 1103, 231, 1998, 1789, 109, 411, 135, 754, 943, 1642, 289, 983, 761, 1871, 1952, 1820, 1073, 1804, 1690, 237, 426, 1428, 374, 2055, 1752, 1801, 1332, 350, 1281, 801, 210, 775, 1538, 1198, 1634, 480, 1564, 1373, 741, 2060, 298, 23, 1892, 63, 270, 901, 1177, 1226, 316, 1889, 1325, 1841, 382, 1074, 13

# Neural Net


In [None]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,9), stride=1)
        self.pool1 = nn.MaxPool2d(4, 3)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=(3,9), stride=1)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,9), stride=1)
        self.pool2 = nn.MaxPool2d(2, 3)
        self.conv4 = nn.Conv2d(32, 32, kernel_size=(3,9), stride=1)
        self.conv5 = nn.Conv2d(32, 32, kernel_size=(3,9), stride=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.fc1_1 = nn.Linear((32*24*24), 128) ### for input shape: [1, 3, 480, 640], the output shape is: [1, 32, 24, 24]
        self.fc1_2 = nn.Linear(128, 64)
        self.fc1_3 = nn.Linear(64, 32)
        self.fc1_4 = nn.Linear(32, 4) # for ee_loc
        self.fc1_5 = nn.Linear(32, 4) # for o_loc

        
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.pool2(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.pool3(x) 
        
        x = x.view(-1, 32*24*24) # Flatten layer
        x = self.fc1_1(x)
        x = self.fc1_2(x)
        x = self.fc1_3(x)
        x1 = self.fc1_4(x) ## for ee_loc
        x2 = self.fc1_5(x) ## for o_loc
        x1 = F.softmax(x1,dim = 1)
        x2 = F.softmax(x2,dim = 1)
        return x1, x2 

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

# model = Net() # On CPU
model = Net().to(device)  # On GPU
print(model)


## Weights based on data imbalance for ee_loc
w1 = torch.tensor([443, 525, 211, 762], dtype=torch.float32)
w1 = 1.0 / w1
w1 = w1 / w1.sum()
w1 = torch.FloatTensor(w1).cuda()
criterion1 = nn.CrossEntropyLoss(weight=w1)
# criterion1 = nn.CrossEntropyLoss()


## Weights based on data imbalance for o_loc
w2 = torch.tensor([719, 525, 211, 486], dtype=torch.float32)
w2 = 1.0 / w2
w2 = w2 / w2.sum()
w2 = torch.FloatTensor(w2).cuda()
criterion2= nn.CrossEntropyLoss(weight=w2)
# criterion2= nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def accuracy(out, labels):
    _,pred = torch.max(out, dim=1)
    return torch.sum(pred==labels).item()


cuda:0
Net(
  (conv1): Conv2d(3, 32, kernel_size=(3, 9), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=4, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 1))
  (conv3): Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=3, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 1))
  (conv5): Conv2d(32, 32, kernel_size=(3, 9), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1_1): Linear(in_features=18432, out_features=128, bias=True)
  (fc1_2): Linear(in_features=128, out_features=64, bias=True)
  (fc1_3): Linear(in_features=64, out_features=32, bias=True)
  (fc1_4): Linear(in_features=32, out_features=4, bias=True)
  (fc1_5): Linear(in_features=32, out_features=4, bias=True)
)


In [None]:
w1

tensor([0.17094, 0.37821, 0.34579, 0.10506], device='cuda:0')

In [None]:
w2

tensor([0.10678, 0.37908, 0.34659, 0.16755], device='cuda:0')

# Training


In [None]:
n_epochs = 1000
print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc1 = []
val_acc2 = []
train_loss = []
train_acc1 = []
train_acc2 = []
total_step = len(train_loader)


### In case we want to use pretrained model for further training
# ## load model state
# checkpoint = torch.load("model_classification_tutorial10.pt")
# model.load_state_dict(checkpoint['state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer'])
# # epoch = checkpoint['epoch']
# valid_loss_min = checkpoint['loss']


for epoch in range(1, n_epochs+1):
    running_loss = 0.0

    # For accuracy estimation
    correct1 = 0
    correct2 = 0
    total1=0
    total2=0

    print(f'Epoch {epoch}\n')
    
    for batch_idx, (data_,  target_) in enumerate(train_loader):
        data_, target_ = data_.to(device),  target_.to(device)# on GPU
        # zero the parameter gradients
        optimizer.zero_grad()
        outputs1, outputs2 = model(data_)
        loss1 = criterion1(outputs1, target_[:,0]) # loss for ee_loc
        loss2 = criterion2(outputs2, target_[:,1]) # loss for o_loc
        loss = loss1 + loss2 # total loss
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        _,pred1 = torch.max(outputs1, dim=1) # Predictions for ee_loc
        _,pred2 = torch.max(outputs2, dim=1) # Predictions for o_loc
        correct1 += torch.sum(pred1==target_[:,0]).item() 
        correct2 += torch.sum(pred2==target_[:,1]).item()
        total1 += target_[:,0].size(0)
        total2 += target_[:,1].size(0)
        if (batch_idx) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, n_epochs, batch_idx, total_step, loss.item()))
    train_acc1.append(100 * correct1 / total1) # train accuracy for ee_loc
    train_acc2.append(100 * correct2 / total2) # train accuracy for o_loc
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc1: {(100 * correct1 / total1):.4f}, train acc2: {(100 * correct2 / total2):.4f}')
    batch_loss = 0
    total_t1=0
    total_t2=0
    correct_t1=0
    correct_t2=0

    ## Evaluation
    with torch.no_grad():
        model.eval()
        for data_t,  target_t in (validation_loader):
            data_t,  target_t = data_t.to(device), target_t.to(device)# on GPU
            outputs_t1, outputs_t2 = model(data_t)#, bb_t)
            loss_t1 = criterion1(outputs_t1, target_t[:,0])
            loss_t2 = criterion2(outputs_t2, target_t[:,1])
            loss_t = loss_t1 + loss_t2 
            batch_loss += loss_t.item()
            _,pred_t1 = torch.max(outputs_t1, dim=1)
            _,pred_t2 = torch.max(outputs_t2, dim=1)
            correct_t1 += torch.sum(pred_t1==target_t[:,0]).item()
            correct_t2 += torch.sum(pred_t2==target_t[:,1]).item()
            total_t1 += target_t[:,0].size(0)
            total_t2 += target_t[:,1].size(0)
        val_acc1.append(100 * correct_t1 / total_t1)
        val_acc2.append(100 * correct_t2 / total_t2)
        val_loss.append(batch_loss/len(validation_loader))
        network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, validation acc1: {(100 * correct_t1 / total_t1):.4f}, validation acc2: {(100 * correct_t2 / total_t2):.4f}\n')#, validation acc3: {(100 * correct_t3 / total_t3):.4f}\n')
        # Saving the best weight 
        if network_learned:
            valid_loss_min = batch_loss
            ## Save model
            state = {'epoch': epoch + 1, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(), 'loss': valid_loss_min,  }
            torch.save(state, 'model_classification_tutorial23.pt') 

            print('Detected network improvement, saving current model')
    model.train()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

train loss: 1.6166, train acc1: 98.0854, train acc2: 96.3181
validation loss: 1.6706, validation acc1: 93.8250, validation acc2: 93.4820

Epoch 446

Epoch [446/1000], Step [0/43], Loss: 1.4874
Epoch [446/1000], Step [20/43], Loss: 1.4873
Epoch [446/1000], Step [40/43], Loss: 1.6445

train loss: 1.6164, train acc1: 98.0854, train acc2: 96.3181
validation loss: 1.6704, validation acc1: 93.4820, validation acc2: 93.1389

Epoch 447

Epoch [447/1000], Step [0/43], Loss: 1.4874
Epoch [447/1000], Step [20/43], Loss: 1.4873
Epoch [447/1000], Step [40/43], Loss: 1.6444

train loss: 1.6162, train acc1: 98.0854, train acc2: 96.3181
validation loss: 1.6703, validation acc1: 93.8250, validation acc2: 93.4820

Epoch 448

Epoch [448/1000], Step [0/43], Loss: 1.4874
Epoch [448/1000], Step [20/43], Loss: 1.4873
Epoch [448/1000], Step [40/43], Loss: 1.6437

train loss: 1.6160, train acc1: 98.0854, train acc2: 96.3181
validation loss: 1.67

# Testing


In [None]:
test_acc1 = []
test_acc2 = []

total_t1=0
total_t2=0

correct_t1=0
correct_t2=0

allpreds1 = torch.rand(0)
allpreds2 = torch.rand(0)
val_allpreds1 = torch.rand(0)
val_allpreds2 = torch.rand(0)

allpreds1 = allpreds1.to(device)
allpreds2 = allpreds2.to(device)
val_allpreds1 = val_allpreds1.to(device)
val_allpreds2 = val_allpreds2.to(device)

alltgs1 = torch.rand(0)
alltgs2 = torch.rand(0)
alltgs1 = alltgs1.to(device)
alltgs2 = alltgs2.to(device)


# ## load model state
checkpoint = torch.load("model_classification_tutorial22.pt") #model_classification_tutorial23.pt
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
epoch = checkpoint['epoch']
valid_loss_min = checkpoint['loss']



with torch.no_grad():
    model.eval()
    for data_t, target_t in (test_loader):
        data_t, target_t = data_t.to(device), target_t.to(device)# on GPU
        outputs_t1, outputs_t2 = model(data_t)

        val_pred_t1,pred_t1 = torch.max(outputs_t1, dim=1)
        val_pred_t2,pred_t2 = torch.max(outputs_t2, dim=1)

        alltgs1 = torch.cat((alltgs1,target_t[:,0]),dim=0)
        alltgs2 = torch.cat((alltgs2,target_t[:,1]),dim=0)
        
        allpreds1 = torch.cat((allpreds1,pred_t1),dim=0)
        allpreds2 = torch.cat((allpreds2,pred_t2),dim=0)        
        val_allpreds1 = torch.cat((val_allpreds1,val_pred_t1),dim=0)
        val_allpreds2 = torch.cat((val_allpreds2,val_pred_t2),dim=0)


        correct_t1 += torch.sum(pred_t1==target_t[:,0]).item()
        correct_t2 += torch.sum(pred_t2==target_t[:,1]).item()
        total_t1 += target_t[:,0].size(0)
        total_t2 += target_t[:,1].size(0)
    test_acc1.append(100 * correct_t1 / total_t1)
    test_acc2.append(100 * correct_t2 / total_t2)

    print(f'test acc1: {(100 * correct_t1 / total_t1):.4f}, test acc2: {(100 * correct_t2 / total_t2):.4f}\n') #, test acc3: {(100 * correct_t3 / total_t3):.4f}\n')


test acc1: 80.0983, test acc2: 84.7666



In [None]:
### confusion matrix endeffecttor location
y_act = alltgs1.cpu().detach().numpy() 
y_pred = allpreds1.cpu().detach().numpy()
from sklearn import metrics
print(metrics.confusion_matrix(y_act, y_pred, labels=[0, 1, 2, 3]))
# Printing the precision and recall, among other metrics
print(metrics.classification_report(y_act, y_pred, labels=[0, 1, 2, 3]))

[[126   0   3   1]
 [  0  50   0   1]
 [  8   0  52   0]
 [ 59   1   8  98]]
              precision    recall  f1-score   support

           0       0.65      0.97      0.78       130
           1       0.98      0.98      0.98        51
           2       0.83      0.87      0.85        60
           3       0.98      0.59      0.74       166

    accuracy                           0.80       407
   macro avg       0.86      0.85      0.84       407
weighted avg       0.85      0.80      0.80       407



In [None]:
### confusion matrix onion location
y_act = alltgs2.cpu().detach().numpy() 
y_pred = allpreds2.cpu().detach().numpy()
from sklearn import metrics
print(metrics.confusion_matrix(y_act, y_pred, labels=[0, 1, 2, 3]))
# Printing the precision and recall, among other metrics
print(metrics.classification_report(y_act, y_pred, labels=[0, 1, 2, 3]))

[[170   0   9   5]
 [  0  51   0   0]
 [  8   0  51   1]
 [ 30   6   3  73]]
              precision    recall  f1-score   support

           0       0.82      0.92      0.87       184
           1       0.89      1.00      0.94        51
           2       0.81      0.85      0.83        60
           3       0.92      0.65      0.76       112

    accuracy                           0.85       407
   macro avg       0.86      0.86      0.85       407
weighted avg       0.86      0.85      0.84       407



In [None]:
# For saving the model
# state = {'epoch': 500, 'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(), 'loss': valid_loss_min,  }
# torch.save(state, 'model_classification_tutorial22.pt') 

# Saving video

In [None]:
import cv2
from google.colab.patches import cv2_imshow


font                   = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (50,100)
fontScale              = 0.5
fontColor              = (0,0,255)
lineType               = 2


def img_display(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    npimg = np.transpose(npimg, (1, 2, 0))
    return npimg
ee_loc_vals = {0.0: 'conveyor', 1.0: 'eye', 2.0:'bin', 3.0:'hover'}
o_loc_vals = {0.0: 'conveyor', 1.0: 'eye', 2.0:'bin', 3.0:'hover'}

eepreds = allpreds1.cpu().detach().numpy()
opreds = allpreds2.cpu().detach().numpy()
conf_eepreds = val_allpreds1.cpu().detach().numpy()
conf_opreds = val_allpreds2.cpu().detach().numpy()

eevals = alltgs1.cpu().detach().numpy()
ovals = alltgs2.cpu().detach().numpy()

out = cv2.VideoWriter('project52.avi',cv2.VideoWriter_fourcc(*'DIVX'), 3, (640,480))


k = 0
for data_t, target_t in (test_loader):
    for i in range(len(data_t)):
        print('k',k)
        # if k == 0:
        img = img_display(data_t[i,:,:,:])[:, :, ::-1]*255
        img = img.astype(np.uint8)
        is_ee = (eepreds[k] == eevals[k]) 
        is_o = (opreds[k] == ovals[k]) 
        tag = 'Frame: '+str(k)+' EE loc: '+ee_loc_vals[eepreds[k]]+' '+str(conf_eepreds[k])+' Onion loc: '+o_loc_vals[opreds[k]]+' '+str(conf_opreds[k])+'\n True EE loc: '+ee_loc_vals[eevals[k]]+' True O loc: '+o_loc_vals[ovals[k]]
        tag1 = 'True EE loc: '+ee_loc_vals[eevals[k]]+' True O loc: '+o_loc_vals[ovals[k]]
        print(tag)
        print('ee:', is_ee, ' o:',is_o)
        img1 = img.copy()
        cv2.putText(img1, tag, 
            bottomLeftCornerOfText, 
            font, 
            fontScale,
            fontColor,
            lineType)
        cv2.putText(img1, tag1, 
            (50,150), 
            font, 
            fontScale,
            fontColor,
            lineType)
        out.write(img1)
        k+=1
out.release()


k 0
Frame: 0 EE loc: conveyor 1.0 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 1
Frame: 1 EE loc: conveyor 1.0 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 2
Frame: 2 EE loc: conveyor 1.0 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 3
Frame: 3 EE loc: conveyor 1.0 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 4
Frame: 4 EE loc: conveyor 1.0 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 5
Frame: 5 EE loc: conveyor 1.0 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 6
Frame: 6 EE loc: conveyor 0.93349725 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 7
Frame: 7 EE loc: conveyor 0.99182194 Onion loc: conveyor 1.0
 True EE loc: conveyor True O loc: conveyor
ee: True  o: True
k 8
Frame: 8 EE loc: conve

In [None]:
# Saving predictions in text file

import pickle
with open("eepreds.txt", "wb") as fp:   #Pickling
    pickle.dump(eepreds, fp)

with open("opreds.txt", "wb") as fp:   #Pickling
    pickle.dump(opreds, fp)
