In [1]:
from torch.utils.data import DataLoader
import torch

In [2]:
t = torch.arange(7, dtype=torch.float32)
t

tensor([0., 1., 2., 3., 4., 5., 6.])

In [3]:
data_loader = DataLoader(t)

In [4]:
for item in data_loader:
    print(item)

tensor([0.])
tensor([1.])
tensor([2.])
tensor([3.])
tensor([4.])
tensor([5.])
tensor([6.])


In [5]:
data_loader = DataLoader(t, batch_size=3, drop_last=False)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([0., 1., 2.])
batch 2: tensor([3., 4., 5.])
batch 3: tensor([6.])


In [6]:
data_loader = DataLoader(t, batch_size=3, drop_last=True)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([0., 1., 2.])
batch 2: tensor([3., 4., 5.])


In [7]:
data_loader = DataLoader(t, batch_size=3, drop_last=False, shuffle=True)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([3., 2., 6.])
batch 2: tensor([1., 0., 5.])
batch 3: tensor([4.])


In [8]:
from torch.utils.data import Dataset

class JointDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [9]:
torch.manual_seed(1)

t_x = torch.rand([4, 3], dtype=torch.float32)
t_y = torch.arange(4)
joint_dataset = JointDataset(t_x, t_y)

#### Or use TensorDataset directly

In [10]:
from torch.utils.data import TensorDataset
joint_dataset = TensorDataset(t_x, t_y)

for example in joint_dataset:
    print('  x: ', example[0], 
          '  y: ', example[1])

  x:  tensor([0.7576, 0.2793, 0.4031])   y:  tensor(0)
  x:  tensor([0.7347, 0.0293, 0.7999])   y:  tensor(1)
  x:  tensor([0.3971, 0.7544, 0.5695])   y:  tensor(2)
  x:  tensor([0.4388, 0.6387, 0.5247])   y:  tensor(3)


In [11]:
torch.manual_seed(1)
data_loader = DataLoader(dataset=joint_dataset, batch_size=2, shuffle=True)

for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0], 
              '\n         y:', batch[1])
        
for epoch in range(2):
    print(f'epoch {epoch+1}')
    for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0], 
              '\n         y:', batch[1])

batch 1: x: tensor([[0.3971, 0.7544, 0.5695],
        [0.7576, 0.2793, 0.4031]]) 
         y: tensor([2, 0])
batch 2: x: tensor([[0.7347, 0.0293, 0.7999],
        [0.4388, 0.6387, 0.5247]]) 
         y: tensor([1, 3])
epoch 1
batch 1: x: tensor([[0.7576, 0.2793, 0.4031],
        [0.3971, 0.7544, 0.5695]]) 
         y: tensor([0, 2])
batch 2: x: tensor([[0.7347, 0.0293, 0.7999],
        [0.4388, 0.6387, 0.5247]]) 
         y: tensor([1, 3])
epoch 2
batch 1: x: tensor([[0.4388, 0.6387, 0.5247],
        [0.3971, 0.7544, 0.5695]]) 
         y: tensor([3, 2])
batch 2: x: tensor([[0.7576, 0.2793, 0.4031],
        [0.7347, 0.0293, 0.7999]]) 
         y: tensor([0, 1])


In [12]:
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import MinMaxScaler

In [13]:
cancer = pd.read_csv("C:/Training/Academy/Statistics (Python)/Cases/Wisconsin/BreastCancer.csv",
                    index_col=0)
dum_cancer = pd.get_dummies(cancer,drop_first=True)
dum_cancer.head()

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class_Malignant
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,0
63375,9,1,2,6,4,10,7,7,2,1
76389,10,4,7,2,2,8,6,1,1,1
95719,6,10,10,10,8,10,7,10,7,1
128059,1,1,1,1,2,5,5,1,1,0


In [14]:
X = dum_cancer.drop('Class_Malignant', axis=1).values
y = dum_cancer['Class_Malignant'].values

scaler = MinMaxScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, 
                                                    random_state=24,stratify=y)
X_scl_trn = scaler.fit_transform(X_train) 
X_scl_tst = scaler.transform(X_test) 

In [15]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_train)
print(X_torch.size())
print(y_torch.size())

torch.Size([489, 9])
torch.Size([489])


## Stochastic Gradient Descent

In [16]:
torch.manual_seed(24)
data_loader = DataLoader(y_torch, batch_size=1, drop_last=False)

#for i, batch in enumerate(data_loader, 1):
#    print(f'batch {i}:', batch)

In [17]:
from torch.utils.data import TensorDataset
joint_dataset = TensorDataset(X_torch.float(), y_torch.float())

In [18]:
type(joint_dataset)

torch.utils.data.dataset.TensorDataset

In [19]:
torch.manual_seed(24)
data_loader = DataLoader(dataset=joint_dataset, batch_size=1, shuffle=True)

#for i, batch in enumerate(data_loader, 1):
#        print(f'batch {i}:', 'x:', batch[0], 
#              '\n         y:', batch[1])

In [20]:
# Create a model
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=5),
                      nn.ReLU(),
                      nn.Linear(5, 3),
                      nn.ReLU(),
                      nn.Linear(3,1),
                      nn.Sigmoid())

In [21]:
criterion = torch.nn.BCELoss()
# Construct the optimizer (Adam in this case)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
# optimizer

In [22]:
y_pred = model(X_torch.float())
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([489, 1])
torch.Size([489, 1])


for epoch in range(2):
    print(f'epoch {epoch+1}')
    for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0], 
              '\n         y:', batch[1].view(-1,1))
    print((batch[0].shape, batch[1].shape))

In [23]:
# Gradient Descent

for epoch in np.arange(0,50):
   for i, batch in enumerate(data_loader, 1):
      # Forward pass: Compute predicted y by passing x to the model
      y_pred_prob = model(batch[0].float())

      # Compute and print loss
      loss = criterion(y_pred_prob, batch[1].view(-1,1).float())
      

      # Zero gradients, perform a backward pass, and update the weights.
      optimizer.zero_grad()

      # perform a backward pass (backpropagation)
      loss.backward()

      # Update the parameters
      optimizer.step()
   if epoch%10 == 0:
       print('epoch: ', epoch+1,' loss: ', loss.item())
   
#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  loss:  0.6894459128379822
epoch:  11  loss:  0.03602258861064911
epoch:  21  loss:  0.0032358013559132814
epoch:  31  loss:  0.026776880025863647
epoch:  41  loss:  0.017899008467793465


In [24]:
X_torch_test = torch.from_numpy(X_scl_tst)
y_pred_prob = model(X_torch_test.float()) # Equivalent predict_proba / predict
y_pred_prob = y_pred_prob.detach().numpy()
y_pred_prob = y_pred_prob.reshape(y_test.shape[0],)
y_pred = np.where(y_pred_prob >= 0.5,1,0)

In [25]:
print(accuracy_score(y_test,y_pred))

0.9761904761904762


In [26]:
log_loss(y_test, y_pred_prob)

0.08719668650023758