In [1]:
## limport required Pytorch methods
from torch.utils.data import Dataset
from torchvision import transforms

In [2]:
# Custom Class to load data from local system to Pytorch model
class LoadTerrainDataSet(Dataset):
  def __init__(self, path, transform): 
    self.path = path
    self.sample = []
    self.label = []
    self.transform = transform
    # read data from sources
    train_data = pd.read_csv(self.path+'trainData.csv', header = None) # replace data path and format
    noOfCols = train_data.shape[1]
    noOfRows = train_data.shape[0]
    train_X = train_data.loc[:,1:] # assuming 2nd column onwards represents features of the dataset
    train_y = train_data.loc[:,0] # assuming 1st column represents target variable in the training dataset 

    # It is important to understand dimension of the image to modify rest of the code. Below code is written
    # to read 2D data of size 48 X N size as an image with a window of size 48 pixels. 
    # For example: 0th index to 47th index is height of the first image;
    # 1st index to 48th index is the height of another 2nd image and so on.
    # Feel free to modify the code as per requirement size.
    # Each image is stored as 2D numpy array in the list (sample).
    # Label is stored as 1D numpy array in the list (label).
    start_index = 1
    end_index = 48
    image_height = 48
    for indx in range(1,noOfRows - image_height):
      start_index = indx
      self.sample.append(train_X[start_index:end_index+1].values)
      self.label.append(train_y[end_index]) 
      end_index += 1

  def __len__(self):
    return len(self.sample)

  def __getitem__(self, index):
    # This method is called to feed a row information to the model for training.
    # Each of the image is transformed to 3 dimension torch (C,H,W). 
    # (C,H,W) represents channel, rows and columns of the image.
    # When C = 1, image is passed as gray scaled image. When C = 3, image is considered to be RGB image.
    # Note: in __init__ method, the numpy arrays should be created accordingly to be considered 
    # as gray-scaled or RGB images before changing C agrument.
    X,y = self.sample[index], self.label[index]
    r,c = X.shape
    X = self.transform(torch.from_numpy(X.reshape((1,r,c))))
    y = torch.from_numpy(np.asarray(y))
    return X,y

In [3]:
## Code after defining class
# Convert data to a normalized torch.FloatTensor
transform = transforms.Compose([
    transforms.Normalize((0.5), (0.5))
    ])

# Creates 2-Dimentional data as an image 
# data = LoadTerrainDataSet(toPath, transform)

## data can be used as datas source in DataLoader.
## Example code
# Split total data into training and validation datasets
# train_set, validation_set = torch.utils.data.random_split(data, [train_set_len, valid_set_len])

# train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
# validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True)