# Data
> Functions used to create pytorch `DataSet`s and `DataLoader`s.

In [None]:
# default_exp data

In [None]:
# export
import torch
from torch.utils.data import Dataset, DataLoader
from fastai.data_block import DataBunch, DatasetType

from sklearn.model_selection import train_test_split

# import warnings

# torch.Tensor.ndim = property(lambda x: x.dim())
# tt = torch.Tensor

In [None]:
# export
class Data(Dataset):
    """
    Load raw x,y data
    """
    def __init__(self, *args):
        super().__init__()
        self.data = args
        
    def __len__(self):
        return len(self.data[0])
    
    def __getitem__(self, i):
        return {f'arg_{i}': torch.Tensor([x[i]]) 
                for i, x in enumerate(self.data)}

In [None]:
'int' in 'uint8'

True

In [None]:
# export
class TestData(Dataset):
    """
    Load raw x,y data
    """
    def __init__(self, x):
        super().__init__()
        self.x = x
        self.x_type = self.__get_type__(x.dtype.name)
        
    def __get_type__(self, type):
        if 'float' in type:
            return torch.float32
        if 'int' in type:
            return torch.long
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, i):
        return torch.tensor(self.x[i], dtype=torch.float32)

class TrainData(TestData):
    """
    Load raw x,y data
    """
    def __init__(self, x, y):
        super().__init__(x)
        self.y = y
        self.y_type = self.__get_type__(y.dtype.name)
    
    def __getitem__(self, i):
        return torch.tensor(self.x[i], dtype=torch.float32), \
                torch.tensor(self.y[i], dtype=self.y_type)

In [None]:
import numpy as np

x = np.random.randn(100, 3)
y = np.random.randn(100, 1)
train_ds = TrainData(x, y)

x_elem, y_elem = train_ds[99]
print(x_elem, x_elem.dtype)
print(y_elem, y_elem.dtype)

tensor([ 0.5909, -1.1880,  0.0718]) torch.float32
tensor([-1.3023]) torch.float32


In [None]:
y = np.random.randint(0, 3, (100, 1))
train_ds = TrainData(x, y)

x_elem, y_elem = train_ds[99]
print(x_elem, x_elem.dtype)
print(y_elem, y_elem.dtype)

tensor([ 0.5909, -1.1880,  0.0718]) torch.float32
tensor([2]) torch.int64


In [None]:
# export
def create_db(x, y, train_size=0.8, bs=96, random_state=42):
    """
    Take dataframe and convert to Fastai databunch
    """
    X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size)
    
    train_ds = TrainData(X_train, y_train)
    val_ds = TrainData(X_test, y_test)
    
    bs = min(bs, len(train_ds))
    val_bs = min(bs, len(val_ds))
    
    train_dl = DataLoader(train_ds, bs)
    val_dl = DataLoader(val_ds, val_bs)

    return DataBunch(train_dl, val_dl)

In [None]:
x = np.random.randn(100, 3)
y = np.random.randn(100, 1)
db = create_db(x,y, bs=10)

In [None]:
[x.shape for x in db.one_batch()]

[torch.Size([10, 3]), torch.Size([10, 1])]

In [None]:
# hide
from nbdev.export import *
notebook2script()

Converted Activations.ipynb.
Converted Layers.ipynb.
Converted Model.ipynb.
Converted cifar.ipynb.
Converted data.ipynb.
Converted index.ipynb.
Converted losses.ipynb.
