# Tensor Dataset and DataLoader

# Import Libraries

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('dark_background')
import seaborn as sns
import pandas as pd
import numpy as np

# Load the data

In [32]:
iris = sns.load_dataset('iris')
iris.head(20)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [33]:
type(iris)

pandas.core.frame.DataFrame

In [34]:
iris.species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

# Extracting Features and Labels and Transform them to Pytorch Tensors.

In [35]:
X  = iris.iloc[:,:-1].values

In [36]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [37]:
X = torch.tensor( X, dtype = torch.float32)

In [38]:
X

tensor([[5.1000, 3.5000, 1.4000, 0.2000],
        [4.9000, 3.0000, 1.4000, 0.2000],
        [4.7000, 3.2000, 1.3000, 0.2000],
        [4.6000, 3.1000, 1.5000, 0.2000],
        [5.0000, 3.6000, 1.4000, 0.2000],
        [5.4000, 3.9000, 1.7000, 0.4000],
        [4.6000, 3.4000, 1.4000, 0.3000],
        [5.0000, 3.4000, 1.5000, 0.2000],
        [4.4000, 2.9000, 1.4000, 0.2000],
        [4.9000, 3.1000, 1.5000, 0.1000],
        [5.4000, 3.7000, 1.5000, 0.2000],
        [4.8000, 3.4000, 1.6000, 0.2000],
        [4.8000, 3.0000, 1.4000, 0.1000],
        [4.3000, 3.0000, 1.1000, 0.1000],
        [5.8000, 4.0000, 1.2000, 0.2000],
        [5.7000, 4.4000, 1.5000, 0.4000],
        [5.4000, 3.9000, 1.3000, 0.4000],
        [5.1000, 3.5000, 1.4000, 0.3000],
        [5.7000, 3.8000, 1.7000, 0.3000],
        [5.1000, 3.8000, 1.5000, 0.3000],
        [5.4000, 3.4000, 1.7000, 0.2000],
        [5.1000, 3.7000, 1.5000, 0.4000],
        [4.6000, 3.6000, 1.0000, 0.2000],
        [5.1000, 3.3000, 1.7000, 0

# Extracting Labels And Transform them into Integers.

In [39]:
y = torch.zeros(len(X), dtype = torch.long)
y[iris.species == 'setosa'] = 0
y[iris.species == 'versicolor'] = 1
y[iris.species == 'virginica'] = 2

In [40]:
print(y)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])


# Four Important Steps to reach to DataLoaders

### First Step : Perform train test Split

In [41]:
train_data, test_data, train_labels, test_labels = train_test_split(X, y, test_size = 0.1)

### Second Step : Data Normalization / Standardization

In [42]:
train_data = train_data/torch.max(train_data)
test_data  = test_data/torch.max(test_data)

### Third Step : Convert the training and testing components of dataset into Pytorch Dataset (Cannot Cross-Validate)

In [43]:
X_train = TensorDataset(train_data,train_labels)
X_test = TensorDataset(test_data,test_labels)

In [44]:
print(X_test.tensors)

(tensor([[0.9610, 0.3636, 0.7922, 0.2468],
        [0.6623, 0.4416, 0.1948, 0.0260],
        [0.8701, 0.3896, 0.6494, 0.2208],
        [0.8312, 0.4156, 0.5844, 0.1948],
        [0.5714, 0.3766, 0.1818, 0.0260],
        [1.0000, 0.4935, 0.8701, 0.2857],
        [0.7532, 0.3506, 0.6623, 0.2468],
        [0.6364, 0.4026, 0.1948, 0.0130],
        [0.7792, 0.3896, 0.6234, 0.2338],
        [0.7403, 0.3636, 0.5844, 0.1688],
        [0.8182, 0.4286, 0.7792, 0.3247],
        [0.6234, 0.4026, 0.2078, 0.0260],
        [0.7922, 0.3896, 0.6364, 0.2338],
        [0.8701, 0.3896, 0.6753, 0.2987],
        [0.7532, 0.3636, 0.6623, 0.3117]]), tensor([2, 0, 1, 1, 0, 2, 2, 0, 2, 1, 2, 0, 2, 2, 2]))


### Fourth Step : Push the Tensor Datasets into Dataloader Objects ( creating batches and shuffling the data)

In [45]:
batchsize    = 4
train_loader = DataLoader(X_train, batch_size = batchsize, shuffle = True)
test_loader  = DataLoader(X_test, batch_size = 1)

In [48]:
for X,y in train_loader:
  break
print(X)
print(y)
print(" ")

tensor([[0.6329, 0.2532, 0.4430, 0.1266],
        [0.6962, 0.2911, 0.5063, 0.1646],
        [0.6582, 0.3418, 0.4937, 0.1772],
        [0.7975, 0.3418, 0.6203, 0.2278]])
tensor([1, 1, 1, 2])
 


In [50]:
for X,y in train_loader:
    print(X)
    print(y)
    print(" ")

tensor([[0.8354, 0.3671, 0.5823, 0.1646],
        [0.7722, 0.3291, 0.7089, 0.1772],
        [0.8734, 0.3924, 0.6456, 0.2911],
        [0.7089, 0.3797, 0.5696, 0.1899]])
tensor([1, 2, 2, 1])
 
tensor([[0.7975, 0.3165, 0.6203, 0.1899],
        [0.9747, 0.3544, 0.8481, 0.2532],
        [0.5949, 0.4051, 0.2025, 0.0253],
        [0.7848, 0.4304, 0.6835, 0.2911]])
tensor([1, 2, 0, 2])
 
tensor([[0.7342, 0.3418, 0.4937, 0.1519],
        [0.6203, 0.3797, 0.1772, 0.0253],
        [0.8101, 0.3544, 0.7089, 0.2658],
        [0.6329, 0.4051, 0.1519, 0.0253]])
tensor([1, 0, 2, 0])
 
tensor([[0.8101, 0.4051, 0.6709, 0.2911],
        [0.6835, 0.4937, 0.2152, 0.0506],
        [0.8608, 0.3544, 0.6076, 0.1772],
        [0.6329, 0.4304, 0.1899, 0.0253]])
tensor([2, 0, 1, 0])
 
tensor([[0.7595, 0.4304, 0.5696, 0.2025],
        [0.6076, 0.3797, 0.1772, 0.0127],
        [0.6962, 0.3291, 0.5570, 0.1519],
        [0.6582, 0.4430, 0.1899, 0.0253]])
tensor([1, 0, 1, 0])
 
tensor([[0.7468, 0.3797, 0.5316, 0.1899]

In [51]:
for X,y in test_loader:
    print(X)
    print(y)
    print(" ")

tensor([[0.9610, 0.3636, 0.7922, 0.2468]])
tensor([2])
 
tensor([[0.6623, 0.4416, 0.1948, 0.0260]])
tensor([0])
 
tensor([[0.8701, 0.3896, 0.6494, 0.2208]])
tensor([1])
 
tensor([[0.8312, 0.4156, 0.5844, 0.1948]])
tensor([1])
 
tensor([[0.5714, 0.3766, 0.1818, 0.0260]])
tensor([0])
 
tensor([[1.0000, 0.4935, 0.8701, 0.2857]])
tensor([2])
 
tensor([[0.7532, 0.3506, 0.6623, 0.2468]])
tensor([2])
 
tensor([[0.6364, 0.4026, 0.1948, 0.0130]])
tensor([0])
 
tensor([[0.7792, 0.3896, 0.6234, 0.2338]])
tensor([2])
 
tensor([[0.7403, 0.3636, 0.5844, 0.1688]])
tensor([1])
 
tensor([[0.8182, 0.4286, 0.7792, 0.3247]])
tensor([2])
 
tensor([[0.6234, 0.4026, 0.2078, 0.0260]])
tensor([0])
 
tensor([[0.7922, 0.3896, 0.6364, 0.2338]])
tensor([2])
 
tensor([[0.8701, 0.3896, 0.6753, 0.2987]])
tensor([2])
 
tensor([[0.7532, 0.3636, 0.6623, 0.3117]])
tensor([2])
 
