In [5]:
%matplotlib inline
import os
from pathlib import Path
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils import data
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

import numpy as np


In [6]:
p = Path('dataset/iris.data')
df = pd.read_csv(p)
feature_cols = ['sepal_length', 'sepal_width','petal_length','petal_witdh']
target_cols = ['species']

In [4]:
class IrisDataset(data.Dataset):
    def __init__(
            self, path:str, feature_cols:list, 
            target_cols:list, clazz:list, 
            transforms_feature=None, transforms_target=None):
        
        self.path = Path(path)
        self.dframe = pd.read_csv(self.path)
        self.feature_cols = feature_cols
        self.target_cols = target_cols
        self.clazz = clazz
        self.transforms_feature = transforms_feature
        self.transforms_target = transforms_target
        
        self.__normalize_target()
        self.class_to_idx = self.__class_to_label()
        self.idx_to_class = self.__idx_to_class()
    
    def __len__(self):
        return len(self.dframe)
    
    def __class_to_label(self):
        mapz = [(val, idx) for idx, val in enumerate(self.clazz)]
        return dict(mapz)
    
    def __idx_to_class(self):
        mapz = [(idx, val) for idx, val in enumerate(self.clazz)]
        return dict(mapz)
    
    def __normalize_target(self):
        cat_type = CategoricalDtype(categories=self.clazz, ordered=True)
        self.dframe[self.target_cols[0]] = self.dframe[self.target_cols[0]].astype(cat_type).cat.codes
    
    def __getitem__(self, idx):
        feature = self.dframe[self.feature_cols].iloc[idx].values
        target = self.dframe[self.target_cols].iloc[idx].values
        target = np.squeeze(target)
        
        if self.transforms_feature:
            feature = self.transforms_feature(feature)
        if self.transforms_target:
            target = self.transforms_target(target)
            
        return feature, target


def indice_splitter(dataset, valid_size, shuflle=True):
    num_data = len(dataset)
    indices = list(range(num_data))
    split = int(np.floor(valid_size * num_data))
    if shuflle:
        np.random.seed(1)
        np.random.shuffle(indices)
    train_idx, valid_idx = indices[split:], indices[:split]
    return train_idx, valid_idx

class NumpyToFloatTensor(object):
    def __call__(self, param):
        return torch.from_numpy(param.astype(np.float32)).float()

class NumpyToLongTensor(object):
    def __call__(self, param):
        return torch.from_numpy(param.astype(np.long)).long()

        
        
path = 'dataset/iris.data'
feature_cols = ['sepal_length', 'sepal_width','petal_length','petal_witdh']
target_cols = ['class']
clazz = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]

iris_dataset = IrisDataset(
    path, feature_cols, 
    target_cols, clazz, 
    transforms_feature=NumpyToFloatTensor(), transforms_target=NumpyToLongTensor())

train_idx, valid_idx = indice_splitter(iris_dataset, valid_size=0.2)

train_loader = data.DataLoader(iris_dataset, batch_size=32, sampler=SubsetRandomSampler(train_idx), num_workers=0)
valid_loader = data.DataLoader(iris_dataset, batch_size=32, sampler=SubsetRandomSampler(valid_idx), num_workers=0)