In [1]:
import torch
import torchvision.transforms as transforms
import torch.utils.data as data
import os
import pickle
import numpy as np
from PIL import Image
import json

In [29]:
class CocoDataset(data.Dataset):
    """COCO Custom Dataset compatible with torch.utils.data.DataLoader."""

    def __init__(self, root, origin_file, img_tags, vocab):
        """Set the path for images, captions and vocabulary wrapper.

        Args:
            root: image directory.
            json: coco annotation file path.
            vocab: vocabulary wrapper.
            transform: image transformer.
        """
        self.root = root
        with open(origin_file, 'r') as j:
            self.origin_file = json.load(j)
        with open(img_tags, 'r') as j:
            self.img_tags = json.load(j)
        with open(vocab, 'r') as j:
            self.vocab = json.load(j)
        self.transform = transforms.Compose([
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    def __getitem__(self, index):
        """Returns one data pair (image and caption)."""

        word2id = self.vocab['word_map']

        img_id = self.origin_file['images'][index]['imgid']
        path = self.origin_file['images'][index]['filepath'] + \
            '/'+self.origin_file['images'][index]['filename']

        image = Image.open(os.path.join(self.root, path)).convert('RGB')
        if self.transform is not None:
            image = self.transform(image)

        # Convert caption (string) to word ids.
        tags = []
        # t = list(map(str.lower, img_tags[index]))
        tags = [word2id[token] for token in self.img_tags[str(index)]]
        target = torch.Tensor(tags)
        return image, target

    def __len__(self):
        return len(self.ids)

In [30]:
root = '/home/lkk/datasets/coco2014'
origin_file = root+'/'+'dataset_coco.json'
img_tags='./img_tags.json'
voc = './vocab.json'
coco=CocoDataset(root,origin_file,img_tags,voc)

In [31]:
coco[0]

.
[110, 19, 32, 12, 30, 28, 972, 234, 139, 544, 207, 50, 67, 160, 208, 514, 1, 525, 376, 84, 213, 144, 499, 892]


(tensor([[[ 2.2147,  2.2147,  2.2147,  ...,  1.5468,  1.7352,  2.1462],
          [ 2.2147,  2.2489,  2.1804,  ...,  2.1462,  1.3927,  1.7009],
          [ 2.0777,  2.2489,  2.2489,  ...,  1.9749,  1.7180,  1.3927],
          ...,
          [-1.0904, -1.1760, -0.5938,  ...,  0.2796,  0.1597,  0.4508],
          [-1.1247, -1.4672, -0.8678,  ...,  0.4337,  0.3652,  0.5022],
          [-0.4739, -1.0390, -1.6555,  ...,  0.5364,  0.5022,  0.5707]],
 
         [[ 2.3936,  2.3761,  2.4111,  ..., -0.7402, -0.5126, -0.1975],
          [ 2.3936,  2.3410,  2.3235,  ..., -0.1800, -0.7927, -0.4251],
          [ 2.4111,  2.4111,  2.3761,  ..., -0.4076, -0.4951, -0.9328],
          ...,
          [-0.8803, -0.8978, -0.3200,  ...,  0.0126, -0.1099,  0.1176],
          [-1.0028, -1.1604, -0.7402,  ...,  0.1527,  0.0651,  0.1527],
          [-0.0049, -0.6176, -1.6155,  ...,  0.2227,  0.1702,  0.3277]],
 
         [[ 2.5703,  2.6400,  2.6400,  ..., -0.4973,  0.1128,  0.1999],
          [ 2.4308,  2.5529,

In [13]:
a.next

AttributeError: 'iterator' object has no attribute 'next'