In [1]:
from multiprocessing import Process, Manager, Barrier, Queue, SimpleQueue, Array
import time, logging, os, copy, pathlib, io, random, sys
import statistics as stats
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import torch

torch.set_num_threads(1)

import psutil
p = psutil.Process()
p.cpu_affinity([11])

seed = np.random.randint(2147483647) # make a seed with numpy generator 
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed) # apply this seed to img transforms

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s.%(msecs)03d %(levelname)s:\t%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
log = logging.getLogger(__name__)
from shared_memory_dict import SharedMemoryDict
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])

num_p = 8
rep = 1
batch_size = 256
augmentations=[
            transforms.Resize(256),
            transforms.CenterCrop(224),
#             transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(p=1.0),
            transforms.ToTensor(),
            normalize
        ]
synthetic = False
decoding = False
memcached = False
fromdisk = False
augmentations_composed=transforms.Compose(augmentations)
    
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')
    
def memloader(byte_data):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    img = Image.open(byte_data)
    return img.convert('RGB')
    
def aug(imgd,result,barrier,num,preloading,num_p,rep, memcached, batch_size, fromdisk):
    import psutil
    p = psutil.Process()
    p.cpu_affinity([num])
    arr = []
    for j in range(rep):
        barrier.wait()
        log.debug(f"Start Process{num}")
        start=time.perf_counter()
        
        for i in range(batch_size):
            if fromdisk:
                 img = pil_loader(imgd[num+num_p*i])
            elif preloading:
                img = imgd.get_nowait() if memcached else imgd[num+num_p*i]
#                 log.debug(imgd.qsize())
            else:
                img = memloader(imgd.get_nowait()) if memcached else memloader(imgd[num+num_p*i])
#                 log.debug(imgd.qsize())
            # image colocation
            arr.append(augmentations_composed(img))
    
        end=time.perf_counter()                            
    
        elapsed_time=end-start
        log.debug(f"End Process{num} Access time: {elapsed_time}")
#         result.append(elapsed_time)
        
    return arr

# manager1 = Manager(); manager2 = Manager(); manager3 = Manager(); manager4 = Manager()
# client = base.Client(('localhost', 11211));

d= []
results = []
for i in range(num_p):
    d.append({})
    results.append(i)
# d = [manager1.dict(),manager1.dict(),manager1.dict(),manager1.dict()]
# d = [client,client,client,client]
# d = [manager1.dict(),manager2.dict(),manager3.dict(),manager4.dict()]
# d = [Array(, lock=False), SimpleQueue(), SimpleQueue(), SimpleQueue()]
# results = [manager1.list(),manager2.list(),manager3.list(),manager4.list()]


for i in range(num_p*256):
    if synthetic:
        if memcached:
            d[i % num_p].put(torch.rand((3, 224, 224)))
        else:
            d[i % num_p][i] = torch.rand((3, 224, 224))
    elif decoding:
        path =pathlib.Path(f'/home/chanho/ssd2/1024batch10/train/n02364673/{i}.JPEG')
        if memcached:
            d[i % num_p].put(pil_loader(path))
        else:
            d[i % num_p][i] = pil_loader(path)
    else:
        path =pathlib.Path(f'/home/chanho/ssd2/1024batch10/train/n02364673/{i}.JPEG')
        
        if fromdisk:
            d[i % num_p][i] = path
        elif memcached:
            d[i % num_p].put(pil_loader(path))
        else:
            with open(path, 'rb') as f:
                img_byte = f.read()
                img_byte_mem = io.BytesIO(img_byte)
                d[i % num_p][i] = img_byte_mem
    
barrier=Barrier(num_p)
ps = []

for i in range(num_p):
    p=Process(target=aug, 
              args=(d[i % num_p],results[i % num_p],barrier,
                    i,decoding, num_p, rep, memcached, batch_size, fromdisk))
    ps.append(p)

for p in ps: p.start()

for p in ps: p.join()

2021-04-26 15:46:13.804 DEBUG:	Start Process0
2021-04-26 15:46:13.804 DEBUG:	Start Process2
2021-04-26 15:46:13.804 DEBUG:	Start Process6
2021-04-26 15:46:13.804 DEBUG:	Start Process3
2021-04-26 15:46:13.804 DEBUG:	Start Process1
2021-04-26 15:46:13.804 DEBUG:	Start Process7
2021-04-26 15:46:13.804 DEBUG:	Start Process4
2021-04-26 15:46:13.804 DEBUG:	Start Process5
2021-04-26 15:46:14.886 DEBUG:	End Process0 Access time: 1.076746859587729
2021-04-26 15:46:14.893 DEBUG:	End Process4 Access time: 1.0433466518297791
2021-04-26 15:46:14.900 DEBUG:	End Process5 Access time: 1.0492710480466485
2021-04-26 15:46:14.903 DEBUG:	End Process3 Access time: 1.0936345281079412
2021-04-26 15:46:14.907 DEBUG:	End Process6 Access time: 1.0973726212978363
2021-04-26 15:46:14.908 DEBUG:	End Process7 Access time: 1.0975945191457868
2021-04-26 15:46:14.914 DEBUG:	End Process2 Access time: 1.1044503282755613
2021-04-26 15:46:14.917 DEBUG:	End Process1 Access time: 1.1072922265157104


In [2]:
def stat(data):
#     print("max: ", stats.fmean(data))  
#     print("min: ",stats.min())  
    print("mean: ",stats.fmean(data))  
    print("median: ",stats.median(data))  
    print("std: ",stats.stdev(data))  
#     print("10large: ",data.nlargest(10))
#     print("10small: ",data.nsmallest(10))
    print("quantile",stats.quantiles(data, n=6))
    
for i in range(num_p) :
    print(f"\nProcess{i}")
    stat(results[i])
    


Process0


TypeError: 'int' object is not iterable

In [1]:
from PIL import features
features.check_feature("libjpeg_turbo")

True

In [1]:
from PIL import Image
import io
print(type(Image.Image()))
print(type(io.BytesIO()))

<class 'PIL.Image.Image'>
<class '_io.BytesIO'>


In [4]:
import torch

In [15]:
torch.rand(1)

tensor([0.9444])

In [16]:
torch.rand(1)
    

tensor([0.7841])

In [2]:
# from multiprocessing import Process, Manager, Barrier
# import time, logging, os, copy, pathlib
# from PIL import Image
# import torchvision.transforms as transforms
# logging.basicConfig(level=logging.DEBUG, format='%(asctime)s.%(msecs)03d %(levelname)s:\t%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# log = logging.getLogger(__name__)
# from shared_memory_dict import SharedMemoryDict

# def pil_loader(path):
#     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
#     with open(path, 'rb') as f:
#         img = Image.open(f)
#         return img.convert('RGB')

# def f(d,barrier,num):
#     barrier.wait()
#     log.debug(f"Start Process{num}")
#     start=time.perf_counter()
#     for i in range(256):
#         item=d[num+4*i]
#     end=time.perf_counter()
#     log.debug(f"End Process{num} Access time: {end-start}")
#     return

# num_p = 4

# manager1 = Manager()
# manager2 = Manager()
# manager3 = Manager()
# manager4 = Manager()

# d = [manager1.dict(),manager2.dict(),manager3.dict(),manager4.dict()]
# for i in range(num_p*256):
#     path =pathlib.Path(f'/home/chanho/ssd2/1024batch/train/n02364673/{i}.JPEG')
#     d[i % 4][i] = pil_loader(path)
# barrier=Barrier(num_p)
# ps = []
# for i in range(num_p):
#     p = Process(target=f, args=(d[i % 4],barrier,i))
#     ps.append(p)

# for p in ps:
#     p.start()

# for p in ps:
#     p.join()

In [3]:
# def f(d,barrier,num):
#     barrier.wait()
#     log.debug(f"Start Process{num}")
#     start=time.perf_counter()
#     for i in range(256):
#         item=d[num+4*i]
#     end=time.perf_counter()
#     log.debug(f"End Process{num} Access time: {end-start}")
#     return

# def pil_loader(path):
#     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
#     with open(path, 'rb') as f:
#         img = Image.open(f)
#         return img.convert('RGB')

# num_p = 4
# manager1 = Manager()
# manage_dict=manager1.dict()
# d = [manage_dict,manage_dict,manage_dict,manage_dict]
# for i in range(num_p*256):
#     path =pathlib.Path(f'/home/chanho/ssd2/1024batch/train/n02364673/{i}.JPEG')
#     d[i % 4][i] = pil_loader(path)
# barrier=Barrier(num_p)
# ps = []
# for i in range(num_p):
#     p=Process(target=f, args=(d[i % 4],barrier,i))
#     ps.append(p)

# for p in ps:
#     p.start()

# for p in ps:
#     p.join()

In [4]:
# num_p = 4

# d = [{},{},{},{}]
# for i in range(num_p*256):
#     path =pathlib.Path(f'/home/chanho/ssd2/1024batch/train/n02364673/{i}.JPEG')
#     d[i % 4][i] = pil_loader(path)
# barrier=Barrier(num_p)
# ps = []
# for i in range(num_p):
#     p=Process(target=f, args=(d[i % 4],barrier,i))
#     ps.append(p)

# for p in ps:
#     p.start()

# for p in ps:
#     p.join()

In [None]:

# def shdf(d,barrier,num):
#     barrier.wait()
#     log.debug(f"Start Process{num}")
#     start=time.perf_counter()
#     item=d[num]
#     end=time.perf_counter()
#     item_used = item+"2222"
#     log.debug(f"End Process{num} Access time: {end-start}")
#     return
#     smd = [SharedMemoryDict(name='dataset', size=size),
#          SharedMemoryDict(name='dataset', size=size),
#          SharedMemoryDict(name='dataset', size=size),
#          SharedMemoryDict(name='dataset', size=size)]


# dataset_size = 30 # MB;
# size = dataset_size * 1024 * 1024
