In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# create some data
nObservations = 100
nFeatures = 20

data = np.random.randn(nObservations, nFeatures)

In [None]:
# convert to pytorch tensor
dataT = torch.tensor(data)

# print out some information
print('Numpy data:')
print(type(data))
print(data.shape)
print(data.dtype)
print(' ')

print('Tensor Data: ')
print(type(dataT))
print(dataT.size())
print(dataT.dtype)
print(' ')

Numpy data:
<class 'numpy.ndarray'>
(100, 20)
float64
 
Tensor Data: 
<class 'torch.Tensor'>
torch.Size([100, 20])
torch.float64
 


In [None]:
# Sometimes you need to convert data types

dataT2 = torch.tensor(data).float()
print(dataT2.dtype)

# "Long" for ints
dataT3 = torch.tensor(data).long()
print(dataT3.dtype)

torch.float32
torch.int64


In [None]:
dataT3

tensor([[ 0, -1,  0,  ...,  0,  0,  0],
        [ 1,  0,  0,  ...,  1,  0,  0],
        [ 0,  0,  0,  ...,  0,  1,  0],
        ...,
        [ 0,  0,  0,  ..., -2,  0,  0],
        [ 0,  0,  0,  ..., -1,  0,  0],
        [ 0,  0,  0,  ...,  1,  0,  0]])

In [None]:
# Convert tensor into Pytorch Dataset

dataset = TensorDataset(dataT)


dataset.tensors

(tensor([[-0.5895, -1.0376,  0.6888,  ...,  0.3762,  0.2968,  0.7870],
         [ 1.9422, -0.8792,  0.5011,  ...,  1.0541, -0.9977,  0.4822],
         [-0.2771,  0.1149, -0.3878,  ...,  0.3357,  1.4418,  0.0241],
         ...,
         [-0.5652,  0.5079,  0.0244,  ..., -2.2764,  0.1732, -0.2790],
         [-0.6040, -0.5452,  0.1206,  ..., -1.3549,  0.4835, -0.1626],
         [ 0.4007, -0.9913, -0.6598,  ...,  1.1734,  0.3183,  0.5105]],
        dtype=torch.float64),)

In [None]:
labels = torch.ceil(torch.linspace(.01, 4, nObservations))

# transform to an actual matrix
labels = labels.reshape((len(labels), 1))
#print(labels)

## making another dataset
dataset = TensorDataset(dataT, labels)
print(dataset.tensors[0].size())
print(dataset.tensors[1].size())

# for comparison
print(np.shape(np.random.randint(5, size = nObservations)))

torch.Size([100, 20])
torch.Size([100, 1])
(100,)


In [None]:
batchsize = 25
dataloader = DataLoader(dataset, batch_size = batchsize)

dataloader.dataset.tensors[0].size()


torch.Size([100, 20])

In [None]:
# sizes of each batch
for dat, labels in dataloader:
  print('Batch info: ')
  print(dat.size())
  print(labels.size())
  print(' ')

Batch info: 
torch.Size([25, 20])
torch.Size([25, 1])
 
Batch info: 
torch.Size([25, 20])
torch.Size([25, 1])
 
Batch info: 
torch.Size([25, 20])
torch.Size([25, 1])
 
Batch info: 
torch.Size([25, 20])
torch.Size([25, 1])
 


In [None]:
for dat, labs in dataloader:
  print(labs.T)
  print(' ')

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1.]])
 
tensor([[2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2., 2., 2., 2., 2., 2.]])
 
tensor([[3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
         3., 3., 3., 3., 3., 3., 3.]])
 
tensor([[4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
         4., 4., 4., 4., 4., 4., 4.]])
 


In [None]:
data, labs = next(iter(dataloader))

labs

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])