# Deep Learning Bootcamp November 2017, GPU Computing for Data Scientists

<img src="../images/bcamp.png" align="center">

## 09 PyTorch Image Datasets

Web: https://www.meetup.com/Tel-Aviv-Deep-Learning-Bootcamp/events/241762893/

Notebooks: <a href="https://github.com/QuantScientist/Data-Science-PyCUDA-GPU"> On GitHub</a>

*Shlomo Kashani*

<img src="../images/pt.jpg" width="35%" align="center">


# PyTorch Imports


In [3]:
# !pip install pycuda
%reset -f

import numpy as np
import numpy
import os.path
import shutil
import pandas
import sklearn.preprocessing
import sklearn.metrics
import PIL.Image
import random
import matplotlib.pyplot
import pylab
import time
import glob

import torch
import torch.utils.data
import torchvision.transforms
import torch.nn
import torch.nn.functional
import torch.optim
import torch.autograd
%matplotlib inline
import sys
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION')
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())

gpu_index = 0
torch.cuda.set_device(gpu_index)
    
from __future__ import print_function
import torch
x=torch.Tensor(3,2)
import logging
handler=logging.basicConfig(level=logging.INFO)
lgr = logging.getLogger(__name__)

lgr.info (type(x))
lgr.info(x)
torch.from_numpy (np.zeros((3,4))).cuda()



('__Python VERSION:', '2.7.12 (default, Nov 19 2016, 06:48:10) \n[GCC 5.4.0 20160609]')
('__pyTorch VERSION:', '0.1.12+4eb448a')
__CUDA VERSION
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2016 NVIDIA Corporation
Built on Tue_Jan_10_13:22:03_CST_2017
Cuda compilation tools, release 8.0, V8.0.61
('__CUDNN VERSION:', 5110)

INFO:__main__:<class 'torch.FloatTensor'>
INFO:__main__:
1.00000e-36 *
 -7.0698  0.0000
 -7.0698  0.0000
  0.8719  0.0000
[torch.FloatTensor of size 3x2]




('__Number CUDA Devices:', 1L)
__Devices
('Active CUDA Device: GPU', 0L)
('Available devices ', 1L)
('Current cuda device ', 0L)



 0  0  0  0
 0  0  0  0
 0  0  0  0
[torch.cuda.DoubleTensor of size 3x4 (GPU 0)]

# PyTorch image Datasets


In [4]:
DATA_ROOT ='/root/data/amazon/'
IMG_PATH = DATA_ROOT + '/train-jpg/'
IMG_EXT = '.jpg'
IMG_LEBELS_PATH = DATA_ROOT + '/train_v2.csv'

In [5]:
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

class GenericImageDataset(torch.utils.data.dataset.Dataset):
    
    def __init__(self, 
                 csv_training_labels_path, 
                 image_directory, 
                 image_extension,                  
                 transform = None):
        lgr.info("GenericImageDataset CTOR ...")
        self.csv_training_labels_path = csv_training_labels_path
        self.image_directory = image_directory
        self.image_extension = image_extension
        self.transform = transform
        
        lgr.info ("CSV path:" + csv_training_labels_path)
        lgr.info ("IMG path:" + image_directory)
        
        training_examples = pandas.read_csv(self.csv_training_labels_path)
#         tmp_df = pd.read_csv(csv_path, header=None)
        self.training_image_names = training_examples['image_name']
        self.training_labels = training_examples['tags']
        
        self.training_image_names = self.training_image_names.head()
        self.training_labels = self.training_labels.head()

        is_file = lambda training_image_name: os.path.isfile(
            self.image_directory + training_image_name + self.image_extension)
        assert self.training_image_names.apply(
            is_file).all(), "Some training images in " + self.csv_training_labels_path + " are not found."

        self.multi_label_binarizer = sklearn.preprocessing.MultiLabelBinarizer()
        self.training_labels = self.multi_label_binarizer.fit_transform(self.training_labels.str.split())
        self.training_labels = self.training_labels.astype(numpy.float32)
        

    def __getitem__(self, index):
        lgr.info ("Get item:" + str(index))
        training_image_path = self.image_directory + self.training_image_names[index] + self.image_extension
        training_image = PIL.Image.open(training_image_path)
        training_image = training_image.convert('RGB')
        if self.transform is not None:
            training_image = self.transform(training_image)
        training_label = torch.from_numpy(self.training_labels[index])

        return (training_image, training_label)

    def __len__(self):
        L=len(self.training_image_names.index)
        lgr.info ("Lenght:" +str(L))
        return L

In [8]:
transformations = transforms.Compose([transforms.ToTensor()])

train_set = GenericImageDataset(IMG_LEBELS_PATH, 
                                IMG_PATH, 
                                IMG_EXT,                                 
                                transform = transformations)

train_loader = DataLoader(train_set,
                          batch_size=5,
                          shuffle=True,
                          num_workers=1)

INFO:__main__:GenericImageDataset CTOR ...
INFO:__main__:CSV path:/root/data/amazon//train_v2.csv
INFO:__main__:IMG path:/root/data/amazon//train-jpg/


In [12]:
import matplotlib.pyplot as plt
%matplotlib inline

for batch_idx, (data, target) in enumerate(train_loader):
    print (type(data)) # <class 'torch.FloatTensor'>   
#     trans = transforms.ToPILImage()
#     plt.imshow(trans(data))

INFO:__main__:Lenght:5
INFO:__main__:Lenght:5
INFO:__main__:Get item:0
INFO:__main__:Get item:1
INFO:__main__:Get item:4
INFO:__main__:Get item:2
INFO:__main__:Get item:3


<class 'torch.FloatTensor'>
