In [1]:
!pip install wget

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import datetime as dt
import shutil
import matplotlib.pyplot as plt
import copy
from PIL import Image
import wget
from IPython.display import clear_output


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import zipfile

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from torchvision import models, transforms as T

import cv2



In [2]:
"""
- change collection of images from videos to collect only one/two frames per second in order to produce a dataset that is less
likely to have validation/test samples that are almost identical to the train set, such that it would create an ilusion of 
accuracy over those sets while the real world generalization would be much wors that it appears to be.

- expand dataset with new videos and randomly drop images to produce a more balanced dataset so as to avoid seamingly higher 
accuracy than actually exists due to data bias towards count numbers that are better represented.

"""

'\n- change collection of images from videos to collect only one/two frames per second in order to produce a dataset that is less\nlikely to have validation/test samples that are almost identical to the train set, such that it would create an ilusion of \naccuracy over those sets while the real world generalization would be much wors that it appears to be.\n\n- expand dataset with new videos and randomly drop images to produce a more balanced dataset so as to avoid seamingly higher \naccuracy than actually exists due to data bias towards count numbers that are better represented.\n\n'

In [3]:
LarvaData = pd.read_csv('larva_count.csv')
LarvaData.dropna(inplace=True)
len(LarvaData)
LarvaData.dropna(inplace=True)

In [4]:
!mkdir videos

if 'videos' not in os.listdir():
    min_count = min(LarvaData['Manual count'])
    max_count = max(LarvaData['Manual count'])
    count_counts = np.zeros(int(max_count - min_count + 1))
    N_data = len(LarvaData)
    for i, (link, count) in enumerate(zip(LarvaData.Link, LarvaData['Manual count'])):
        clear_output(wait=True)
        print('Current progress:', np.round(i/ N_data * 100, 2), '%')
        print(link)
        v_name = link.split('/')[-1]
        wget.download(link, out='videos')
        cap = cv2.VideoCapture('videos/' + v_name)
        frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        count_counts[int(count-min_count)] += frameCount
    mod_counts = np.around(count_counts / 900)

mkdir: cannot create directory ‘videos’: File exists


In [None]:
!mkdir images

if 'images' not in os.listdir():
    LarvaData = LarvaData.dropna()
    num_images = 0
    im_data = pd.DataFrame(columns=['names', 'counts'])
    num_videos = 0
    for link, count in zip(LarvaData.Link, LarvaData['Manual count']):
        #wget.download(link, out='videos')
        num_videos += 1
        clear_output(wait=True)
        print('Current progress:', np.round(num_videos/ N_data * 100, 2), '%')
        v_name = link.split('/')[-1]
        cap = cv2.VideoCapture('videos/' + v_name)
        fc = 0
        ret = True
        mod_count = int(mod_counts[int(count - min_count)])
        frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if mod_count == 0:
            mod_count = 1
        while (fc < frameCount and ret):
            ret, im = cap.read()
            fc += 1
            if fc%mod_count == 0 and ret:
                im_name = v_name.split('.')[0] + '_' + str(fc) + '.jpg'
                im_path = 'images/' + im_name
                cv2.imwrite(im_path, im)
                im_data = im_data.append({'names': im_name, 'counts': count}, ignore_index=True)
                num_images += 1
                if num_images%500 == 0:
                    print(num_images)
    print('Images dataset size is', num_images)
    print(dt.datetime.now())
    im_data.to_csv('im_data.csv', index=False)
    os.listdir()

mkdir: cannot create directory ‘images’: File exists


In [None]:
im_data = pd.read_csv('im_data.csv', usecols=['names', 'counts'])
im_data = im_data.dropna()
im_data.index = list(range(len(im_data.index)))

In [None]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
print(device)
  
class LarvaDataset(Dataset):
  
    def __init__(self, dataset, transform=None):
        self.names = dataset.names
        self.counts = dataset.counts
        self.transform = transform
        self.len = len(dataset)
  
    def __getitem__(self, index):
        count = torch.tensor(self.counts[index], device=device)
        im = Image.open('images/' + self.names[index])
        if self.transform is not None:
            im = self.transform(im)
            im.to(device)
        return im, count
  
    def __len__(self):
        return self.len
  
  
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
transform_train = T.Compose([T.Resize(255, interpolation=Image.BICUBIC), 
                             T.ColorJitter(.1, .1, .1, .1), 
                             T.RandomAffine(0, translate=(0.1, 0.1)), 
                             T.ToTensor(), 
                             T.Normalize(mean, std)])
transform_eval = T.Compose([T.Resize(255, interpolation=Image.BICUBIC), 
                            T.ToTensor(), 
                            T.Normalize(mean, std)])


train_names, test_names, train_counts, test_counts = train_test_split(im_data.names, im_data.counts, test_size=0.2)
train_names, val_names, train_counts, val_counts = train_test_split(train_names, train_counts, test_size=0.25)
train_df = pd.DataFrame(data=np.array([train_names.values, train_counts.values]).T, columns=['names', 'counts'], 
                        index=list(range(len(train_counts))))
val_df = pd.DataFrame(data=np.array([val_names.values, val_counts.values]).T, columns=['names', 'counts'], 
                      index=list(range(len(val_counts))))
test_df = pd.DataFrame(data=np.array([test_names.values, test_counts.values]).T, columns=['names', 'counts'], 
                       index=list(range(len(test_counts))))

train_set = LarvaDataset(train_df, transform=transform_train)
val_set = LarvaDataset(val_df, transform=transform_eval)
test_set = LarvaDataset(test_df, transform=transform_eval)

train_loader = DataLoader(train_set, batch_size = 64, sampler=SubsetRandomSampler(range(len(train_set))))
val_loader = DataLoader(val_set, batch_size = 128, sampler=SubsetRandomSampler(range(len(val_set))))
test_loader = DataLoader(test_set, batch_size = 128, sampler=SubsetRandomSampler(range(len(test_set))))

cuda


In [None]:
# defining the training procedure

def train(model, optimizer, loader, val_loader, PATH, epochs=50, schedualer=None, best_v_loss=None):
    train_loss = []
    val_loss = []
    best_model_wts = copy.deepcopy(model.state_dict())
    if best_v_loss is None:
        best_v_loss = (im_data['counts'].max())**2
    for e in range(epochs):
        print()
        print('current learning rate is lr = {}'.format(optimizer.state_dict()['param_groups'][0]['lr']))
        print(dt.datetime.now())
        print()
        torch.cuda.empty_cache()
        for t, (d, l) in enumerate(loader):
            model.train()
            model.to(device)
            data = d.to(device)
            labels = l.to(device)
            scores, _ = model(data)
            loss = F.mse_loss(scores.view(-1), labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            torch.cuda.empty_cache()
            print('ephoch {0} iteration {1} train loss {2}'.format(e, t, loss))
            torch.cuda.empty_cache()
            if t%250 == 0:
                print('ephoch {0} iteration {1} train loss {2}'.format(e, t, loss))
                print(dt.datetime.now())
                val_loss.append(accuracy(model, val_loader))
                print(dt.datetime.now())
                train_loss.append(loss)
                print('GPU memory allocated {}'.format(torch.cuda.memory_allocated()))
                print()
                if val_loss[-1] < best_v_loss:
                    best_v_loss = val_loss[-1]
                    best_model_wts = copy.deepcopy(model.state_dict())
                    torch.save({
                            'epoch': e,
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict(),
                            'loss': loss,
                            'val_loss':val_loss[-1],
                            }, PATH)
        if schedualer is not None:
            schedualer.step()
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(), 'best_model.pth')
    return train_loss, val_loss


# deting the accuracy of model preductions of a dataset

def accuracy(model, loader):
    loss_list = []
    model.eval()
    num_correct = 0
    num_samples = 0
    running_mean = 0
    with torch.no_grad():
        for data, labels in loader:
            model.to(device)
            data = data.to(device)
            labels = labels.to(device)
            scores, _ = model(data)
            preds = scores.view(-1)
            loss_list.append(F.mse_loss(preds, labels))
            num_correct += ((preds - labels).abs()/labels <= 0.05).sum()
            num_samples += len(labels)
            current_mean = ((preds - labels).abs()/labels).mean()
            running_mean += ((preds - labels).abs()/labels).sum()
            torch.cuda.empty_cache()
        running_mean /= num_samples
        acc = float(num_correct) / num_samples
        print('got {0} / {1} correct at {2}%'.format(num_correct, num_samples, acc*100))
        print('the average accuracy is {}%'.format(running_mean*100))
        torch.cuda.empty_cache()
        return np.mean(torch.tensor(loss_list).numpy())

In [None]:
class CSRNet(nn.Module):
    
    def __init__(self, load_weights=False):
        super(CSRNet, self).__init__()
        self.seen = 0
        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat  = [512, 512, 512, 256, 128, 64]
        self.frontend = make_layers(self.frontend_feat)
        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
        if not load_weights:
            mod = models.vgg16(pretrained=True)
            self._initialize_weights()
            for i in xrange(len(self.frontend.state_dict().items())):
                self.frontend.state_dict().items()[i][1].data[:] = mod.state_dict().items()[i][1].data[:]
    
    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        s = x.sum((1, 2, 3))
        return s, x
      
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            
                
def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [None]:
csrnet = CSRNet(load_weights=True)
optimizer = optim.Adam(csrnet.parameters(), lr=1e-6)

CP_PATH = 'checkpoint.pth.tar'
try:
    checkpoint = torch.load(CP_PATH)
    best_v_loss = checkpoint['val_loss']
    csrnet.load_state_dict(checkpoint['model_state_dict'])
except:
    checkpoint = torch.load('0model_best.pth.tar')
    csrnet.load_state_dict(checkpoint['state_dict'])
    best_v_loss = None



In [None]:
optimizer = optim.Adam(csrnet.parameters(), lr=1e-7)

In [None]:
csrnet.to(device)
schedualer = optim.lr_scheduler.StepLR(optimizer, step_size=1)

In [None]:
train_loss, val_loss = train(csrnet, optimizer, train_loader, val_loader, CP_PATH, epochs=10, best_v_loss=best_v_loss)


current learning rate is lr = 1e-07
2019-08-07 18:43:17.583741

ephoch 0 iteration 0 train loss 105.72002410888672
ephoch 0 iteration 0 train loss 105.72002410888672
2019-08-07 18:43:25.903778
got 13399 / 15881 correct at 84.37126125558844%
the average accuracy is 2.790194272994995%
2019-08-07 19:05:12.057875
GPU memory allocated 623521792

ephoch 0 iteration 1 train loss 98.63580322265625
ephoch 0 iteration 2 train loss 73.87606811523438
ephoch 0 iteration 3 train loss 119.40758514404297
ephoch 0 iteration 4 train loss 94.2315673828125
ephoch 0 iteration 5 train loss 104.3741455078125
ephoch 0 iteration 6 train loss 131.30043029785156
ephoch 0 iteration 7 train loss 83.12932586669922
ephoch 0 iteration 8 train loss 76.46619415283203
ephoch 0 iteration 9 train loss 77.45465087890625
ephoch 0 iteration 10 train loss 103.29241943359375
ephoch 0 iteration 11 train loss 66.63739776611328
ephoch 0 iteration 12 train loss 98.79231262207031
ephoch 0 iteration 13 train loss 73.57967376708984


ephoch 0 iteration 168 train loss 150.60076904296875
ephoch 0 iteration 169 train loss 65.7871322631836
ephoch 0 iteration 170 train loss 110.51423645019531
ephoch 0 iteration 171 train loss 82.80037689208984
ephoch 0 iteration 172 train loss 101.88916015625
ephoch 0 iteration 173 train loss 118.10134887695312
ephoch 0 iteration 174 train loss 133.06784057617188
ephoch 0 iteration 175 train loss 128.94210815429688
ephoch 0 iteration 176 train loss 106.85215759277344
ephoch 0 iteration 177 train loss 102.62232971191406
ephoch 0 iteration 178 train loss 81.36483764648438
ephoch 0 iteration 179 train loss 120.67059326171875
ephoch 0 iteration 180 train loss 76.14460754394531
ephoch 0 iteration 181 train loss 85.79192352294922
ephoch 0 iteration 182 train loss 85.15878295898438
ephoch 0 iteration 183 train loss 77.25363159179688
ephoch 0 iteration 184 train loss 80.76475524902344
ephoch 0 iteration 185 train loss 106.25869750976562
ephoch 0 iteration 186 train loss 100.01734924316406
ephoc

In [0]:
def count(path, model):
    """
    evaluates the number of larva present in input.
    input is either an image of a video. if input is an image, the evaluation is done once over the image, if input is a
    video, the evaluation is done over every caption in the video seperately and then averaged over all captions to
    produce the result
    :param path: a path to an image or a video
    :return: count
    """


    # Define the device(processor) type
    device = torch.device('cpu')
    if torch.cuda.is_available():
        device = torch.device('cuda')
    
    # Load the image or video
    im_list = []
    try:
        im_list.append(Image.open(path))
    except OSError:
        cap = cv2.VideoCapture(path)
        frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fc = 0
        ret = True
        im_list = []
        while (fc < frameCount and ret):
            ret, im = cap.read()
            if fc%10 == 0:
                #new_im = np.zeros_like(im)
                #new_im[:,:,0] = im[:,:,2]
                #new_im[:,:,1] = im[:,:,1]
                #new_im[:,:,2] = im[:,:,0]
                im_list.append(Image.fromarray(im.astype('uint8')))
            fc += 1

    # Disable gradients
    with torch.no_grad():
        # Prepare data for model
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        transform_eval = T.Compose([T.Resize(255, interpolation=Image.BICUBIC), T.ToTensor(), T.Normalize(mean, std)])
        model_input = torch.stack([torch.tensor(transform_eval(im), device=device) for im in im_list])
        model_input.to(device)
        results, densities = model(model_input)
        if len(results) > 1:
            results_mean = results.mean()
        return results_mean, results

In [0]:
video_names = os.listdir('videos')
results = []
labels = []
errors = []
for i, v_name in enumerate(video_names):
    r = count('videos/' + v_name, csrnet)
    v_idx = np.where(im_data.names == (v_name.split('.')[0]+'_1.jpg'))[0][0]
    l = im_data.counts[v_idx]
    results.append(r)
    labels.append(l)
    errors.append((r-l)/l)
    print(i)

In [0]:
results = torch.tensor(results)
labels = torch.tensor(labels)
errors = torch.tensor(errors)

In [0]:
plt.scatter(err.numpy(), labels.numpy())

In [0]:
for n in im_data.names:
    print(n.split('_')[1].split('.')[0])