# DC Speed-Up Tutorial

In this notebook, we take an image embedding task as example to compare some speed-up strategies Towhee's DataCollection provides.

## Preparation

In [None]:
! curl -L https://github.com/towhee-io/examples/releases/download/data/reverse_image_search.zip -O
! unzip -q -o reverse_image_search.zip

In [None]:
import towhee
import sys
import torch
from torchvision import transforms
import torchvision.transforms.functional as TF
from torchvision import models

@towhee.register
class image_normalize:
    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
        self._mean = mean
        self._std = std
        
    def __call__(self, x):
        x = torch.tensor(x, dtype=torch.float32).permute(2, 0, 1)
        return TF.normalize(x, mean=self._mean, std=self._std).detach().numpy()
    
    def __vcall__(self, x):
        x = torch.tensor(x, dtype=torch.float32).permute(0, 3, 1, 2)
        return TF.normalize(x, mean=self._mean, std=self._std).detach().numpy()

@towhee.register
class image_embedding:
    def __init__(self):
        self._device = 'cuda' if torch.cuda.is_available() else 'cpu'
        torch_model = models.resnet18(pretrained=True)
        torch_model = torch.nn.Sequential(*(list(torch_model.children())[:-1]))
        torch_model.to(self._device)
        _ = torch_model.eval()
        
        self._model = torch_model

    def __call__(self, imgs):
        imgs = torch.tensor(imgs).to(self._device)
        imgs = torch.unsqueeze(imgs, 0)
        embedding = self._model(imgs).detach().cpu().numpy()
        return embedding.reshape([512])
    
    def __vcall__(self, imgs):
        imgs = torch.tensor(imgs).to(self._device)
        embedding = self._model(imgs).detach().cpu().numpy()
        return embedding.reshape([-1, 512])

## Default

In [None]:
%%time
import towhee

dc = (
    towhee.read_csv('reverse_image_search.csv').unstream()
        .runas_op['id', 'id'](func=lambda x: int(x))
        .image_decode['path', 'img']()
        .image_resize['img', 'img'](dsize=[224, 224])
)

In [None]:
%%time
(
    dc.image_normalize['img', 'nimg']()
        .image_embedding['nimg', 'embedding']()
        .show()
)

# Chunk

In [None]:
%%time
import towhee

dc = (
    towhee.read_csv('reverse_image_search.csv').unstream()
        .runas_op['id', 'id'](func=lambda x: int(x))
        .image_decode['path', 'img']()
        .image_resize['img', 'img'](dsize=[224, 224])
        .set_chunksize(20)
)

In [None]:
%%time
(
    dc.image_normalize['img', 'nimg']()
        .image_embedding['nimg', 'embedding']()
        .show()
)

## Parallel

In [None]:
%%time
import towhee

dc = (
    towhee.read_csv('reverse_image_search.csv').unstream()
        .runas_op['id', 'id'](func=lambda x: int(x))
        .image_decode['path', 'img']()
        .image_resize['img', 'img'](dsize=[224, 224])
        .set_parallel(5)
)

In [None]:
%%time
(
    dc.image_normalize['img', 'nimg']()
        .image_embedding['nimg', 'embedding']()
        .show()
)

## JIT

In [None]:
%%time
import towhee

dc = (
    towhee.read_csv('reverse_image_search.csv').unstream()
        .runas_op['id', 'id'](func=lambda x: int(x))
        .image_decode['path', 'img']()
        .image_resize['img', 'img'](dsize=[224, 224])
)

In [None]:
%%time
(
    dc.image_normalize['img', 'nimg']()
        .set_jit('towhee')
        .image_embedding['nimg', 'embedding']()
        .show()
)

## Parallel in GPU

In [None]:
%%time
import towhee
from torch.profiler import profile, ProfilerActivity

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
	dc = (
		towhee.read_csv('reverse_image_search.csv').unstream()
			.runas_op['id', 'id'](func=lambda x: int(x))
			.set_parallel(5)
			.image_decode['path', 'img']()
			.image_resize['img', 'img'](dsize=[224, 224])
			.image_normalize['img', 'nimg']()
        	.image_embedding['nimg', 'embedding']()
			.show()
	)

prof.export_chrome_trace("para.json")

## Parallel in GPU with JIT

In [None]:
%%time
import towhee
from torch.profiler import profile, ProfilerActivity

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
	dc = (
		towhee.read_csv('reverse_image_search.csv')
			.runas_op['id', 'id'](func=lambda x: int(x))
			.set_parallel(5)
			.image_decode['path', 'img']()
			.image_resize['img', 'img'](dsize=[224, 224])
			.image_normalize['img', 'nimg']()
			.set_jit('towhee')
        	.image_embedding['nimg', 'embedding']()
			.show()
	)

prof.export_chrome_trace("para_jit.json")

# Parallel & Chunk

In [None]:
%%time
import towhee

dc = (
    towhee.read_csv('reverse_image_search.csv').unstream()
        .runas_op['id', 'id'](func=lambda x: int(x))
        .image_decode['path', 'img']()
        .image_resize['img', 'img'](dsize=[224, 224])
        .set_chunksize(20)
        .set_parallel(5)
)

In [None]:
%%time
(
    dc.image_normalize['img', 'nimg']()
        .image_embedding['nimg', 'embedding']()
        .show()
)

## Parallel & Chunk & JIT

In [None]:
%%time
import towhee

dc = (
    towhee.read_csv('reverse_image_search.csv')
        .runas_op['id', 'id'](func=lambda x: int(x))
		.set_parallel(5)
        .set_chunksize(10)
        .image_decode['path', 'img']()
        .image_embedding.timm['img', 'vec'](model_name='resnet50')
        .to_list()
)