In [34]:
# Imports
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Pytorch Gpu Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_default_device(device)

import matplotlib.pyplot as plt

# Matplotlib svg plots for better pictures
import matplotlib_inline.backend_inline

matplotlib_inline.backend_inline.set_matplotlib_formats("svg")

import seaborn as sns
import pandas as pd
from torchsummary import summary

# Sklearn
from sklearn.model_selection import train_test_split

In [35]:
# Import dataset
iris = pd.read_csv(
    "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
)

# Convert dataset from pandas datafram to tensor
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# transform species to number
labels = torch.zeros(len(data), dtype=torch.long)
labels[iris.species == "setosa"] = (
    0  # We don't need this line as torch.zeros initialize the tensor with zeros
)
labels[iris.species == "versicolor"] = 1
labels[iris.species == "virginica"] = 2

In [36]:
# Seperate data into train and test sets

# Training Examples
propTrain = 0.8  # in proportion not percent
nTraining = int(len(labels) * propTrain)

# initialize a boolean vector to select data and labels
trainTestBool = np.zeros(len(labels), dtype=bool)

# trainTestBool[range(nTraining)] = True # Not the right way to do the split because it will not randomise data

# This is the better way
items2use4train = np.random.choice(range(len(labels)), nTraining, replace=False)
trainTestBool[items2use4train] = True


print(trainTestBool)

[ True  True  True  True  True  True False  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True False
  True  True  True  True  True  True  True  True  True  True  True False
  True  True  True False  True False False False False False  True  True
 False  True  True False  True  True False  True  True  True False  True
 False  True  True  True  True  True False  True False  True  True  True
  True  True  True  True False  True  True  True  True  True  True False
 False  True  True  True  True  True  True  True  True  True False  True
  True False  True  True  True False  True  True False  True False False
  True  True False  True  True  True False  True  True  True  True  True
  True  True  True  True  True  True  True  True False  True  True False
  True  True  True  True  True  True  True False  True  True  True  True
  True  True  True  True  True  True]


In [None]:
# Let's check wether train and test are balanced or not?
print(f"Average of full data: {torch.mean(labels.float())}")
print(" ")
print(f"Average of training data: {torch.mean(labels[trainTestBool].float())}")
print(" ")
print(f"Average of test data: {torch.mean(labels[~trainTestBool].float())}")
print(" ")

Average of full data: 1.0
 
Average of training data: 1.008333444595337
 
Average of test data: 0.9666666984558105
 
